1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2637 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2638 }
2639
2640 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2641 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2642 }
2643
2644 #ifdef ASSERT
2645 static bool is_ndd_demotable(const MachNode* mdef) {
2646 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2647 }
2648 #endif
2649
2650 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2651 int oper_index) {
2652 if (mdef == nullptr) {
2653 return false;
2654 }
2655
2656 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2657 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2658 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2659 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2660 return false;
2661 }
2662
2663 // Complex memory operand covers multiple incoming edges needed for
2664 // address computation. Biasing def towards any address component will not
2665 // result in NDD demotion by assembler.
2666 if (mdef->operand_num_edges(oper_index) != 1) {
2667 return false;
2668 }
2669
2670 // Demotion candidate must be register mask compatible with definition.
2671 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2672 if (!oper_mask.overlap(mdef->out_RegMask())) {
2673 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2674 return false;
2675 }
2676
2677 switch (oper_index) {
2678 // First operand of MachNode corresponding to Intel APX NDD selection
2679 // pattern can share its assigned register with definition operand if
2680 // their live ranges do not overlap. In such a scenario we can demote
2681 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2682 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2683 // are decorated with a special flag by instruction selector.
2684 case 1:
2685 return is_ndd_demotable_opr1(mdef);
2686
2687 // Definition operand of commutative operation can be biased towards second
2688 // operand.
2689 case 2:
2690 return is_ndd_demotable_opr2(mdef);
2691
2692 // Current scheme only selects up to two biasing candidates
2693 default:
2694 assert(false, "unhandled operand index: %s", mdef->Name());
2695 break;
2696 }
2697
2698 return false;
2699 }
2700
2701 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2702 assert(EnableVectorSupport, "sanity");
2703 int lo = XMM0_num;
2704 int hi = XMM0b_num;
2705 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2706 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2707 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2708 return OptoRegPair(hi, lo);
2709 }
2710
2711 // Is this branch offset short enough that a short branch can be used?
2712 //
2713 // NOTE: If the platform does not provide any short branch variants, then
2714 // this method should return false for offset 0.
2715 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2716 // The passed offset is relative to address of the branch.
2717 // On 86 a branch displacement is calculated relative to address
2718 // of a next instruction.
2719 offset -= br_size;
2720
2721 // the short version of jmpConUCF2 contains multiple branches,
2722 // making the reach slightly less
2723 if (rule == jmpConUCF2_rule)
2724 return (-126 <= offset && offset <= 125);
2725 return (-128 <= offset && offset <= 127);
2726 }
2727
2728 // Return whether or not this register is ever used as an argument.
2729 // This function is used on startup to build the trampoline stubs in
2730 // generateOptoStub. Registers not mentioned will be killed by the VM
2731 // call in the trampoline, and arguments in those registers not be
2732 // available to the callee.
2733 bool Matcher::can_be_java_arg(int reg)
2734 {
2735 return
2736 reg == RDI_num || reg == RDI_H_num ||
2737 reg == RSI_num || reg == RSI_H_num ||
2738 reg == RDX_num || reg == RDX_H_num ||
2739 reg == RCX_num || reg == RCX_H_num ||
2740 reg == R8_num || reg == R8_H_num ||
2741 reg == R9_num || reg == R9_H_num ||
2742 reg == R12_num || reg == R12_H_num ||
2743 reg == XMM0_num || reg == XMM0b_num ||
2744 reg == XMM1_num || reg == XMM1b_num ||
2745 reg == XMM2_num || reg == XMM2b_num ||
2746 reg == XMM3_num || reg == XMM3b_num ||
2747 reg == XMM4_num || reg == XMM4b_num ||
2748 reg == XMM5_num || reg == XMM5b_num ||
2749 reg == XMM6_num || reg == XMM6b_num ||
2750 reg == XMM7_num || reg == XMM7b_num;
2751 }
2752
2753 bool Matcher::is_spillable_arg(int reg)
2754 {
2755 return can_be_java_arg(reg);
2756 }
2757
2758 uint Matcher::int_pressure_limit()
2759 {
2760 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2761 }
2762
2763 uint Matcher::float_pressure_limit()
2764 {
2765 // After experiment around with different values, the following default threshold
2766 // works best for LCM's register pressure scheduling on x64.
2767 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2768 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2769 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2770 }
2771
2772 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2773 // In 64 bit mode a code which use multiply when
2774 // devisor is constant is faster than hardware
2775 // DIV instruction (it uses MulHiL).
2776 return false;
2777 }
2778
2779 // Register for DIVI projection of divmodI
2780 const RegMask& Matcher::divI_proj_mask() {
2781 return INT_RAX_REG_mask();
2782 }
2783
2784 // Register for MODI projection of divmodI
2785 const RegMask& Matcher::modI_proj_mask() {
2786 return INT_RDX_REG_mask();
2787 }
2788
2789 // Register for DIVL projection of divmodL
2790 const RegMask& Matcher::divL_proj_mask() {
2791 return LONG_RAX_REG_mask();
2792 }
2793
2794 // Register for MODL projection of divmodL
2795 const RegMask& Matcher::modL_proj_mask() {
2796 return LONG_RDX_REG_mask();
2797 }
2798
2799 %}
2800
2801 source_hpp %{
2802 // Header information of the source block.
2803 // Method declarations/definitions which are used outside
2804 // the ad-scope can conveniently be defined here.
2805 //
2806 // To keep related declarations/definitions/uses close together,
2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2808
2809 #include "runtime/vm_version.hpp"
2810
2811 class NativeJump;
2812
2813 class CallStubImpl {
2814
2815 //--------------------------------------------------------------
2816 //---< Used for optimization in Compile::shorten_branches >---
2817 //--------------------------------------------------------------
2818
2819 public:
2820 // Size of call trampoline stub.
2821 static uint size_call_trampoline() {
2822 return 0; // no call trampolines on this platform
2823 }
2824
2825 // number of relocations needed by a call trampoline stub
2826 static uint reloc_call_trampoline() {
2827 return 0; // no call trampolines on this platform
2828 }
2829 };
2830
2831 class HandlerImpl {
2832
2833 public:
2834
2835 static int emit_deopt_handler(C2_MacroAssembler* masm);
2836
2837 static uint size_deopt_handler() {
2838 // one call and one jmp.
2839 return 7;
2840 }
2841 };
2842
2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2844 switch(bytes) {
2845 case 4: // fall-through
2846 case 8: // fall-through
2847 case 16: return Assembler::AVX_128bit;
2848 case 32: return Assembler::AVX_256bit;
2849 case 64: return Assembler::AVX_512bit;
2850
2851 default: {
2852 ShouldNotReachHere();
2853 return Assembler::AVX_NoVec;
2854 }
2855 }
2856 }
2857
2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2859 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2860 }
2861
2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2863 uint def_idx = use->operand_index(opnd);
2864 Node* def = use->in(def_idx);
2865 return vector_length_encoding(def);
2866 }
2867
2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
2869 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2870 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2871 }
2872
2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2874 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2875 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2876 }
2877
2878 class Node::PD {
2879 public:
2880 enum NodeFlags : uint64_t {
2881 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2882 Flag_sets_carry_flag = Node::_last_flag << 2,
2883 Flag_sets_parity_flag = Node::_last_flag << 3,
2884 Flag_sets_zero_flag = Node::_last_flag << 4,
2885 Flag_sets_overflow_flag = Node::_last_flag << 5,
2886 Flag_sets_sign_flag = Node::_last_flag << 6,
2887 Flag_clears_carry_flag = Node::_last_flag << 7,
2888 Flag_clears_parity_flag = Node::_last_flag << 8,
2889 Flag_clears_zero_flag = Node::_last_flag << 9,
2890 Flag_clears_overflow_flag = Node::_last_flag << 10,
2891 Flag_clears_sign_flag = Node::_last_flag << 11,
2892 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2893 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2894 _last_flag = Flag_ndd_demotable_opr2
2895 };
2896 };
2897
2898 %} // end source_hpp
2899
2900 source %{
2901
2902 #include "opto/addnode.hpp"
2903 #include "c2_intelJccErratum_x86.hpp"
2904
2905 void PhaseOutput::pd_perform_mach_node_analysis() {
2906 if (VM_Version::has_intel_jcc_erratum()) {
2907 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2908 _buf_sizes._code += extra_padding;
2909 }
2910 }
2911
2912 int MachNode::pd_alignment_required() const {
2913 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2914 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2915 return IntelJccErratum::largest_jcc_size() + 1;
2916 } else {
2917 return 1;
2918 }
2919 }
2920
2921 int MachNode::compute_padding(int current_offset) const {
2922 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2923 Compile* C = Compile::current();
2924 PhaseOutput* output = C->output();
2925 Block* block = output->block();
2926 int index = output->index();
2927 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2928 } else {
2929 return 0;
2930 }
2931 }
2932
2933 // Emit deopt handler code.
2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2935
2936 // Note that the code buffer's insts_mark is always relative to insts.
2937 // That's why we must use the macroassembler to generate a handler.
2938 address base = __ start_a_stub(size_deopt_handler());
2939 if (base == nullptr) {
2940 ciEnv::current()->record_failure("CodeCache is full");
2941 return 0; // CodeBuffer::expand failed
2942 }
2943 int offset = __ offset();
2944
2945 Label start;
2946 __ bind(start);
2947
2948 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2949
2950 int entry_offset = __ offset();
2951
2952 __ jmp(start);
2953
2954 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2955 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2956 "out of bounds read in post-call NOP check");
2957 __ end_a_stub();
2958 return entry_offset;
2959 }
2960
2961 static Assembler::Width widthForType(BasicType bt) {
2962 if (bt == T_BYTE) {
2963 return Assembler::B;
2964 } else if (bt == T_SHORT) {
2965 return Assembler::W;
2966 } else if (bt == T_INT) {
2967 return Assembler::D;
2968 } else {
2969 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2970 return Assembler::Q;
2971 }
2972 }
2973
2974 //=============================================================================
2975
2976 // Float masks come from different places depending on platform.
2977 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2978 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2979 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2980 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2981 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2982 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2983 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2984 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2985 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2986 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2987 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2988 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2989 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2990 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2991 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2992 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2993 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2994 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2995 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2996
2997 //=============================================================================
2998 bool Matcher::match_rule_supported(int opcode) {
2999 if (!has_match_rule(opcode)) {
3000 return false; // no match rule present
3001 }
3002 switch (opcode) {
3003 case Op_AbsVL:
3004 case Op_StoreVectorScatter:
3005 if (UseAVX < 3) {
3006 return false;
3007 }
3008 break;
3009 case Op_PopCountI:
3010 case Op_PopCountL:
3011 if (!UsePopCountInstruction) {
3012 return false;
3013 }
3014 break;
3015 case Op_PopCountVI:
3016 if (UseAVX < 2) {
3017 return false;
3018 }
3019 break;
3020 case Op_CompressV:
3021 case Op_ExpandV:
3022 case Op_PopCountVL:
3023 if (UseAVX < 2) {
3024 return false;
3025 }
3026 break;
3027 case Op_MulVI:
3028 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3029 return false;
3030 }
3031 break;
3032 case Op_MulVL:
3033 if (UseSSE < 4) { // only with SSE4_1 or AVX
3034 return false;
3035 }
3036 break;
3037 case Op_MulReductionVL:
3038 if (VM_Version::supports_avx512dq() == false) {
3039 return false;
3040 }
3041 break;
3042 case Op_AbsVB:
3043 case Op_AbsVS:
3044 case Op_AbsVI:
3045 case Op_AddReductionVI:
3046 case Op_AndReductionV:
3047 case Op_OrReductionV:
3048 case Op_XorReductionV:
3049 if (UseSSE < 3) { // requires at least SSSE3
3050 return false;
3051 }
3052 break;
3053 case Op_MaxHF:
3054 case Op_MinHF:
3055 if (!VM_Version::supports_avx512vlbw()) {
3056 return false;
3057 } // fallthrough
3058 case Op_AddHF:
3059 case Op_DivHF:
3060 case Op_FmaHF:
3061 case Op_MulHF:
3062 case Op_ReinterpretS2HF:
3063 case Op_ReinterpretHF2S:
3064 case Op_SubHF:
3065 case Op_SqrtHF:
3066 if (!VM_Version::supports_avx512_fp16()) {
3067 return false;
3068 }
3069 break;
3070 case Op_VectorLoadShuffle:
3071 case Op_VectorRearrange:
3072 case Op_MulReductionVI:
3073 if (UseSSE < 4) { // requires at least SSE4
3074 return false;
3075 }
3076 break;
3077 case Op_IsInfiniteF:
3078 case Op_IsInfiniteD:
3079 if (!VM_Version::supports_avx512dq()) {
3080 return false;
3081 }
3082 break;
3083 case Op_SqrtVD:
3084 case Op_SqrtVF:
3085 case Op_VectorMaskCmp:
3086 case Op_VectorCastB2X:
3087 case Op_VectorCastS2X:
3088 case Op_VectorCastI2X:
3089 case Op_VectorCastL2X:
3090 case Op_VectorCastF2X:
3091 case Op_VectorCastD2X:
3092 case Op_VectorUCastB2X:
3093 case Op_VectorUCastS2X:
3094 case Op_VectorUCastI2X:
3095 case Op_VectorMaskCast:
3096 if (UseAVX < 1) { // enabled for AVX only
3097 return false;
3098 }
3099 break;
3100 case Op_PopulateIndex:
3101 if (UseAVX < 2) {
3102 return false;
3103 }
3104 break;
3105 case Op_RoundVF:
3106 if (UseAVX < 2) { // enabled for AVX2 only
3107 return false;
3108 }
3109 break;
3110 case Op_RoundVD:
3111 if (UseAVX < 3) {
3112 return false; // enabled for AVX3 only
3113 }
3114 break;
3115 case Op_CompareAndSwapL:
3116 case Op_CompareAndSwapP:
3117 break;
3118 case Op_StrIndexOf:
3119 if (!UseSSE42Intrinsics) {
3120 return false;
3121 }
3122 break;
3123 case Op_StrIndexOfChar:
3124 if (!UseSSE42Intrinsics) {
3125 return false;
3126 }
3127 break;
3128 case Op_OnSpinWait:
3129 if (VM_Version::supports_on_spin_wait() == false) {
3130 return false;
3131 }
3132 break;
3133 case Op_MulVB:
3134 case Op_LShiftVB:
3135 case Op_RShiftVB:
3136 case Op_URShiftVB:
3137 case Op_VectorInsert:
3138 case Op_VectorLoadMask:
3139 case Op_VectorStoreMask:
3140 case Op_VectorBlend:
3141 if (UseSSE < 4) {
3142 return false;
3143 }
3144 break;
3145 case Op_MaxD:
3146 case Op_MaxF:
3147 case Op_MinD:
3148 case Op_MinF:
3149 if (UseAVX < 1) { // enabled for AVX only
3150 return false;
3151 }
3152 break;
3153 case Op_CacheWB:
3154 case Op_CacheWBPreSync:
3155 case Op_CacheWBPostSync:
3156 if (!VM_Version::supports_data_cache_line_flush()) {
3157 return false;
3158 }
3159 break;
3160 case Op_ExtractB:
3161 case Op_ExtractL:
3162 case Op_ExtractI:
3163 case Op_RoundDoubleMode:
3164 if (UseSSE < 4) {
3165 return false;
3166 }
3167 break;
3168 case Op_RoundDoubleModeV:
3169 if (VM_Version::supports_avx() == false) {
3170 return false; // 128bit vroundpd is not available
3171 }
3172 break;
3173 case Op_LoadVectorGather:
3174 case Op_LoadVectorGatherMasked:
3175 if (UseAVX < 2) {
3176 return false;
3177 }
3178 break;
3179 case Op_FmaF:
3180 case Op_FmaD:
3181 case Op_FmaVD:
3182 case Op_FmaVF:
3183 if (!UseFMA) {
3184 return false;
3185 }
3186 break;
3187 case Op_MacroLogicV:
3188 if (UseAVX < 3 || !UseVectorMacroLogic) {
3189 return false;
3190 }
3191 break;
3192
3193 case Op_VectorCmpMasked:
3194 case Op_VectorMaskGen:
3195 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3196 return false;
3197 }
3198 break;
3199 case Op_VectorMaskFirstTrue:
3200 case Op_VectorMaskLastTrue:
3201 case Op_VectorMaskTrueCount:
3202 case Op_VectorMaskToLong:
3203 if (UseAVX < 1) {
3204 return false;
3205 }
3206 break;
3207 case Op_RoundF:
3208 case Op_RoundD:
3209 break;
3210 case Op_CopySignD:
3211 case Op_CopySignF:
3212 if (UseAVX < 3) {
3213 return false;
3214 }
3215 if (!VM_Version::supports_avx512vl()) {
3216 return false;
3217 }
3218 break;
3219 case Op_CompressBits:
3220 case Op_ExpandBits:
3221 if (!VM_Version::supports_bmi2()) {
3222 return false;
3223 }
3224 break;
3225 case Op_CompressM:
3226 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3227 return false;
3228 }
3229 break;
3230 case Op_ConvF2HF:
3231 case Op_ConvHF2F:
3232 if (!VM_Version::supports_float16()) {
3233 return false;
3234 }
3235 break;
3236 case Op_VectorCastF2HF:
3237 case Op_VectorCastHF2F:
3238 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3239 return false;
3240 }
3241 break;
3242 }
3243 return true; // Match rules are supported by default.
3244 }
3245
3246 //------------------------------------------------------------------------
3247
3248 static inline bool is_pop_count_instr_target(BasicType bt) {
3249 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3250 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3251 }
3252
3253 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3254 return match_rule_supported_vector(opcode, vlen, bt);
3255 }
3256
3257 // Identify extra cases that we might want to provide match rules for vector nodes and
3258 // other intrinsics guarded with vector length (vlen) and element type (bt).
3259 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3260 if (!match_rule_supported(opcode)) {
3261 return false;
3262 }
3263 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3264 // * SSE2 supports 128bit vectors for all types;
3265 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3266 // * AVX2 supports 256bit vectors for all types;
3267 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3268 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3269 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3270 // And MaxVectorSize is taken into account as well.
3271 if (!vector_size_supported(bt, vlen)) {
3272 return false;
3273 }
3274 // Special cases which require vector length follow:
3275 // * implementation limitations
3276 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3277 // * 128bit vroundpd instruction is present only in AVX1
3278 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3279 switch (opcode) {
3280 case Op_MaxVHF:
3281 case Op_MinVHF:
3282 if (!VM_Version::supports_avx512bw()) {
3283 return false;
3284 }
3285 case Op_AddVHF:
3286 case Op_DivVHF:
3287 case Op_FmaVHF:
3288 case Op_MulVHF:
3289 case Op_SubVHF:
3290 case Op_SqrtVHF:
3291 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3292 return false;
3293 }
3294 if (!VM_Version::supports_avx512_fp16()) {
3295 return false;
3296 }
3297 break;
3298 case Op_AbsVF:
3299 case Op_NegVF:
3300 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3301 return false; // 512bit vandps and vxorps are not available
3302 }
3303 break;
3304 case Op_AbsVD:
3305 case Op_NegVD:
3306 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3307 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3308 }
3309 break;
3310 case Op_RotateRightV:
3311 case Op_RotateLeftV:
3312 if (bt != T_INT && bt != T_LONG) {
3313 return false;
3314 } // fallthrough
3315 case Op_MacroLogicV:
3316 if (!VM_Version::supports_evex() ||
3317 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3318 return false;
3319 }
3320 break;
3321 case Op_ClearArray:
3322 case Op_VectorMaskGen:
3323 case Op_VectorCmpMasked:
3324 if (!VM_Version::supports_avx512bw()) {
3325 return false;
3326 }
3327 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3328 return false;
3329 }
3330 break;
3331 case Op_LoadVectorMasked:
3332 case Op_StoreVectorMasked:
3333 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3334 return false;
3335 }
3336 break;
3337 case Op_UMinV:
3338 case Op_UMaxV:
3339 if (UseAVX == 0) {
3340 return false;
3341 }
3342 break;
3343 case Op_MaxV:
3344 case Op_MinV:
3345 if (UseSSE < 4 && is_integral_type(bt)) {
3346 return false;
3347 }
3348 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3349 // Float/Double intrinsics are enabled for AVX family currently.
3350 if (UseAVX == 0) {
3351 return false;
3352 }
3353 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3354 return false;
3355 }
3356 }
3357 break;
3358 case Op_CallLeafVector:
3359 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3360 return false;
3361 }
3362 break;
3363 case Op_AddReductionVI:
3364 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3365 return false;
3366 }
3367 // fallthrough
3368 case Op_AndReductionV:
3369 case Op_OrReductionV:
3370 case Op_XorReductionV:
3371 if (is_subword_type(bt) && (UseSSE < 4)) {
3372 return false;
3373 }
3374 break;
3375 case Op_MinReductionV:
3376 case Op_MaxReductionV:
3377 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3378 return false;
3379 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3380 return false;
3381 }
3382 // Float/Double intrinsics enabled for AVX family.
3383 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3384 return false;
3385 }
3386 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3387 return false;
3388 }
3389 break;
3390 case Op_VectorBlend:
3391 if (UseAVX == 0 && size_in_bits < 128) {
3392 return false;
3393 }
3394 break;
3395 case Op_VectorTest:
3396 if (UseSSE < 4) {
3397 return false; // Implementation limitation
3398 } else if (size_in_bits < 32) {
3399 return false; // Implementation limitation
3400 }
3401 break;
3402 case Op_VectorLoadShuffle:
3403 case Op_VectorRearrange:
3404 if(vlen == 2) {
3405 return false; // Implementation limitation due to how shuffle is loaded
3406 } else if (size_in_bits == 256 && UseAVX < 2) {
3407 return false; // Implementation limitation
3408 }
3409 break;
3410 case Op_VectorLoadMask:
3411 case Op_VectorMaskCast:
3412 if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 // fallthrough
3416 case Op_VectorStoreMask:
3417 if (vlen == 2) {
3418 return false; // Implementation limitation
3419 }
3420 break;
3421 case Op_PopulateIndex:
3422 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3423 return false;
3424 }
3425 break;
3426 case Op_VectorCastB2X:
3427 case Op_VectorCastS2X:
3428 case Op_VectorCastI2X:
3429 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3430 return false;
3431 }
3432 break;
3433 case Op_VectorCastL2X:
3434 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3435 return false;
3436 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3437 return false;
3438 }
3439 break;
3440 case Op_VectorCastF2X: {
3441 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3442 // happen after intermediate conversion to integer and special handling
3443 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3444 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3445 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3446 return false;
3447 }
3448 }
3449 // fallthrough
3450 case Op_VectorCastD2X:
3451 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3452 return false;
3453 }
3454 break;
3455 case Op_VectorCastF2HF:
3456 case Op_VectorCastHF2F:
3457 if (!VM_Version::supports_f16c() &&
3458 ((!VM_Version::supports_evex() ||
3459 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3460 return false;
3461 }
3462 break;
3463 case Op_RoundVD:
3464 if (!VM_Version::supports_avx512dq()) {
3465 return false;
3466 }
3467 break;
3468 case Op_MulReductionVI:
3469 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3470 return false;
3471 }
3472 break;
3473 case Op_LoadVectorGatherMasked:
3474 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3475 return false;
3476 }
3477 if (is_subword_type(bt) &&
3478 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3479 (size_in_bits < 64) ||
3480 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3481 return false;
3482 }
3483 break;
3484 case Op_StoreVectorScatterMasked:
3485 case Op_StoreVectorScatter:
3486 if (is_subword_type(bt)) {
3487 return false;
3488 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3489 return false;
3490 }
3491 // fallthrough
3492 case Op_LoadVectorGather:
3493 if (!is_subword_type(bt) && size_in_bits == 64) {
3494 return false;
3495 }
3496 if (is_subword_type(bt) && size_in_bits < 64) {
3497 return false;
3498 }
3499 break;
3500 case Op_SaturatingAddV:
3501 case Op_SaturatingSubV:
3502 if (UseAVX < 1) {
3503 return false; // Implementation limitation
3504 }
3505 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3506 return false;
3507 }
3508 break;
3509 case Op_SelectFromTwoVector:
3510 if (size_in_bits < 128) {
3511 return false;
3512 }
3513 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3514 return false;
3515 }
3516 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3517 return false;
3518 }
3519 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3520 return false;
3521 }
3522 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3523 return false;
3524 }
3525 break;
3526 case Op_MaskAll:
3527 if (!VM_Version::supports_evex()) {
3528 return false;
3529 }
3530 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3531 return false;
3532 }
3533 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 break;
3537 case Op_VectorMaskCmp:
3538 if (vlen < 2 || size_in_bits < 32) {
3539 return false;
3540 }
3541 break;
3542 case Op_CompressM:
3543 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3544 return false;
3545 }
3546 break;
3547 case Op_CompressV:
3548 case Op_ExpandV:
3549 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3550 return false;
3551 }
3552 if (size_in_bits < 128 ) {
3553 return false;
3554 }
3555 case Op_VectorLongToMask:
3556 if (UseAVX < 1) {
3557 return false;
3558 }
3559 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3560 return false;
3561 }
3562 break;
3563 case Op_SignumVD:
3564 case Op_SignumVF:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 break;
3569 case Op_PopCountVI:
3570 case Op_PopCountVL: {
3571 if (!is_pop_count_instr_target(bt) &&
3572 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3573 return false;
3574 }
3575 }
3576 break;
3577 case Op_ReverseV:
3578 case Op_ReverseBytesV:
3579 if (UseAVX < 2) {
3580 return false;
3581 }
3582 break;
3583 case Op_CountTrailingZerosV:
3584 case Op_CountLeadingZerosV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 }
3590 return true; // Per default match rules are supported.
3591 }
3592
3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3594 // ADLC based match_rule_supported routine checks for the existence of pattern based
3595 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3596 // of their non-masked counterpart with mask edge being the differentiator.
3597 // This routine does a strict check on the existence of masked operation patterns
3598 // by returning a default false value for all the other opcodes apart from the
3599 // ones whose masked instruction patterns are defined in this file.
3600 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3601 return false;
3602 }
3603
3604 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3605 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3606 return false;
3607 }
3608 switch(opcode) {
3609 // Unary masked operations
3610 case Op_AbsVB:
3611 case Op_AbsVS:
3612 if(!VM_Version::supports_avx512bw()) {
3613 return false; // Implementation limitation
3614 }
3615 case Op_AbsVI:
3616 case Op_AbsVL:
3617 return true;
3618
3619 // Ternary masked operations
3620 case Op_FmaVF:
3621 case Op_FmaVD:
3622 return true;
3623
3624 case Op_MacroLogicV:
3625 if(bt != T_INT && bt != T_LONG) {
3626 return false;
3627 }
3628 return true;
3629
3630 // Binary masked operations
3631 case Op_AddVB:
3632 case Op_AddVS:
3633 case Op_SubVB:
3634 case Op_SubVS:
3635 case Op_MulVS:
3636 case Op_LShiftVS:
3637 case Op_RShiftVS:
3638 case Op_URShiftVS:
3639 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3640 if (!VM_Version::supports_avx512bw()) {
3641 return false; // Implementation limitation
3642 }
3643 return true;
3644
3645 case Op_MulVL:
3646 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3647 if (!VM_Version::supports_avx512dq()) {
3648 return false; // Implementation limitation
3649 }
3650 return true;
3651
3652 case Op_AndV:
3653 case Op_OrV:
3654 case Op_XorV:
3655 case Op_RotateRightV:
3656 case Op_RotateLeftV:
3657 if (bt != T_INT && bt != T_LONG) {
3658 return false; // Implementation limitation
3659 }
3660 return true;
3661
3662 case Op_VectorLoadMask:
3663 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3664 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3665 return false;
3666 }
3667 return true;
3668
3669 case Op_AddVI:
3670 case Op_AddVL:
3671 case Op_AddVF:
3672 case Op_AddVD:
3673 case Op_SubVI:
3674 case Op_SubVL:
3675 case Op_SubVF:
3676 case Op_SubVD:
3677 case Op_MulVI:
3678 case Op_MulVF:
3679 case Op_MulVD:
3680 case Op_DivVF:
3681 case Op_DivVD:
3682 case Op_SqrtVF:
3683 case Op_SqrtVD:
3684 case Op_LShiftVI:
3685 case Op_LShiftVL:
3686 case Op_RShiftVI:
3687 case Op_RShiftVL:
3688 case Op_URShiftVI:
3689 case Op_URShiftVL:
3690 case Op_LoadVectorMasked:
3691 case Op_StoreVectorMasked:
3692 case Op_LoadVectorGatherMasked:
3693 case Op_StoreVectorScatterMasked:
3694 return true;
3695
3696 case Op_UMinV:
3697 case Op_UMaxV:
3698 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3699 return false;
3700 } // fallthrough
3701 case Op_MaxV:
3702 case Op_MinV:
3703 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3704 return false; // Implementation limitation
3705 }
3706 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3707 return false; // Implementation limitation
3708 }
3709 return true;
3710 case Op_SaturatingAddV:
3711 case Op_SaturatingSubV:
3712 if (!is_subword_type(bt)) {
3713 return false;
3714 }
3715 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719
3720 case Op_VectorMaskCmp:
3721 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorRearrange:
3727 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3731 return false; // Implementation limitation
3732 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3733 return false; // Implementation limitation
3734 }
3735 return true;
3736
3737 // Binary Logical operations
3738 case Op_AndVMask:
3739 case Op_OrVMask:
3740 case Op_XorVMask:
3741 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 case Op_PopCountVI:
3747 case Op_PopCountVL:
3748 if (!is_pop_count_instr_target(bt)) {
3749 return false;
3750 }
3751 return true;
3752
3753 case Op_MaskAll:
3754 return true;
3755
3756 case Op_CountLeadingZerosV:
3757 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3758 return true;
3759 }
3760 default:
3761 return false;
3762 }
3763 }
3764
3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3766 return false;
3767 }
3768
3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3771 switch (elem_bt) {
3772 case T_BYTE: return false;
3773 case T_SHORT: return !VM_Version::supports_avx512bw();
3774 case T_INT: return !VM_Version::supports_avx();
3775 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3776 default:
3777 ShouldNotReachHere();
3778 return false;
3779 }
3780 }
3781
3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3783 // Prefer predicate if the mask type is "TypeVectMask".
3784 return vt->isa_vectmask() != nullptr;
3785 }
3786
3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3788 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3789 bool legacy = (generic_opnd->opcode() == LEGVEC);
3790 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3791 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3792 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3793 return new legVecZOper();
3794 }
3795 if (legacy) {
3796 switch (ideal_reg) {
3797 case Op_VecS: return new legVecSOper();
3798 case Op_VecD: return new legVecDOper();
3799 case Op_VecX: return new legVecXOper();
3800 case Op_VecY: return new legVecYOper();
3801 case Op_VecZ: return new legVecZOper();
3802 }
3803 } else {
3804 switch (ideal_reg) {
3805 case Op_VecS: return new vecSOper();
3806 case Op_VecD: return new vecDOper();
3807 case Op_VecX: return new vecXOper();
3808 case Op_VecY: return new vecYOper();
3809 case Op_VecZ: return new vecZOper();
3810 }
3811 }
3812 ShouldNotReachHere();
3813 return nullptr;
3814 }
3815
3816 bool Matcher::is_reg2reg_move(MachNode* m) {
3817 switch (m->rule()) {
3818 case MoveVec2Leg_rule:
3819 case MoveLeg2Vec_rule:
3820 case MoveF2VL_rule:
3821 case MoveF2LEG_rule:
3822 case MoveVL2F_rule:
3823 case MoveLEG2F_rule:
3824 case MoveD2VL_rule:
3825 case MoveD2LEG_rule:
3826 case MoveVL2D_rule:
3827 case MoveLEG2D_rule:
3828 return true;
3829 default:
3830 return false;
3831 }
3832 }
3833
3834 bool Matcher::is_generic_vector(MachOper* opnd) {
3835 switch (opnd->opcode()) {
3836 case VEC:
3837 case LEGVEC:
3838 return true;
3839 default:
3840 return false;
3841 }
3842 }
3843
3844 //------------------------------------------------------------------------
3845
3846 const RegMask* Matcher::predicate_reg_mask(void) {
3847 return &_VECTMASK_REG_mask;
3848 }
3849
3850 // Max vector size in bytes. 0 if not supported.
3851 int Matcher::vector_width_in_bytes(BasicType bt) {
3852 assert(is_java_primitive(bt), "only primitive type vectors");
3853 // SSE2 supports 128bit vectors for all types.
3854 // AVX2 supports 256bit vectors for all types.
3855 // AVX2/EVEX supports 512bit vectors for all types.
3856 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3857 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3858 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3859 size = (UseAVX > 2) ? 64 : 32;
3860 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3861 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3862 // Use flag to limit vector size.
3863 size = MIN2(size,(int)MaxVectorSize);
3864 // Minimum 2 values in vector (or 4 for bytes).
3865 switch (bt) {
3866 case T_DOUBLE:
3867 case T_LONG:
3868 if (size < 16) return 0;
3869 break;
3870 case T_FLOAT:
3871 case T_INT:
3872 if (size < 8) return 0;
3873 break;
3874 case T_BOOLEAN:
3875 if (size < 4) return 0;
3876 break;
3877 case T_CHAR:
3878 if (size < 4) return 0;
3879 break;
3880 case T_BYTE:
3881 if (size < 4) return 0;
3882 break;
3883 case T_SHORT:
3884 if (size < 4) return 0;
3885 break;
3886 default:
3887 ShouldNotReachHere();
3888 }
3889 return size;
3890 }
3891
3892 // Limits on vector size (number of elements) loaded into vector.
3893 int Matcher::max_vector_size(const BasicType bt) {
3894 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3895 }
3896 int Matcher::min_vector_size(const BasicType bt) {
3897 int max_size = max_vector_size(bt);
3898 // Min size which can be loaded into vector is 4 bytes.
3899 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3900 // Support for calling svml double64 vectors
3901 if (bt == T_DOUBLE) {
3902 size = 1;
3903 }
3904 return MIN2(size,max_size);
3905 }
3906
3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3908 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3909 // by default on Cascade Lake
3910 if (VM_Version::is_default_intel_cascade_lake()) {
3911 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3912 }
3913 return Matcher::max_vector_size(bt);
3914 }
3915
3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3917 return -1;
3918 }
3919
3920 // Vector ideal reg corresponding to specified size in bytes
3921 uint Matcher::vector_ideal_reg(int size) {
3922 assert(MaxVectorSize >= size, "");
3923 switch(size) {
3924 case 4: return Op_VecS;
3925 case 8: return Op_VecD;
3926 case 16: return Op_VecX;
3927 case 32: return Op_VecY;
3928 case 64: return Op_VecZ;
3929 }
3930 ShouldNotReachHere();
3931 return 0;
3932 }
3933
3934 // Check for shift by small constant as well
3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3936 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3937 shift->in(2)->get_int() <= 3 &&
3938 // Are there other uses besides address expressions?
3939 !matcher->is_visited(shift)) {
3940 address_visited.set(shift->_idx); // Flag as address_visited
3941 mstack.push(shift->in(2), Matcher::Visit);
3942 Node *conv = shift->in(1);
3943 // Allow Matcher to match the rule which bypass
3944 // ConvI2L operation for an array index on LP64
3945 // if the index value is positive.
3946 if (conv->Opcode() == Op_ConvI2L &&
3947 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3948 // Are there other uses besides address expressions?
3949 !matcher->is_visited(conv)) {
3950 address_visited.set(conv->_idx); // Flag as address_visited
3951 mstack.push(conv->in(1), Matcher::Pre_Visit);
3952 } else {
3953 mstack.push(conv, Matcher::Pre_Visit);
3954 }
3955 return true;
3956 }
3957 return false;
3958 }
3959
3960 // This function identifies sub-graphs in which a 'load' node is
3961 // input to two different nodes, and such that it can be matched
3962 // with BMI instructions like blsi, blsr, etc.
3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3965 // refers to the same node.
3966 //
3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3968 // This is a temporary solution until we make DAGs expressible in ADL.
3969 template<typename ConType>
3970 class FusedPatternMatcher {
3971 Node* _op1_node;
3972 Node* _mop_node;
3973 int _con_op;
3974
3975 static int match_next(Node* n, int next_op, int next_op_idx) {
3976 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3977 return -1;
3978 }
3979
3980 if (next_op_idx == -1) { // n is commutative, try rotations
3981 if (n->in(1)->Opcode() == next_op) {
3982 return 1;
3983 } else if (n->in(2)->Opcode() == next_op) {
3984 return 2;
3985 }
3986 } else {
3987 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3988 if (n->in(next_op_idx)->Opcode() == next_op) {
3989 return next_op_idx;
3990 }
3991 }
3992 return -1;
3993 }
3994
3995 public:
3996 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3997 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3998
3999 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4000 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4001 typename ConType::NativeType con_value) {
4002 if (_op1_node->Opcode() != op1) {
4003 return false;
4004 }
4005 if (_mop_node->outcnt() > 2) {
4006 return false;
4007 }
4008 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4009 if (op1_op2_idx == -1) {
4010 return false;
4011 }
4012 // Memory operation must be the other edge
4013 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4014
4015 // Check that the mop node is really what we want
4016 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4017 Node* op2_node = _op1_node->in(op1_op2_idx);
4018 if (op2_node->outcnt() > 1) {
4019 return false;
4020 }
4021 assert(op2_node->Opcode() == op2, "Should be");
4022 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4023 if (op2_con_idx == -1) {
4024 return false;
4025 }
4026 // Memory operation must be the other edge
4027 int op2_mop_idx = (op2_con_idx & 1) + 1;
4028 // Check that the memory operation is the same node
4029 if (op2_node->in(op2_mop_idx) == _mop_node) {
4030 // Now check the constant
4031 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4032 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4033 return true;
4034 }
4035 }
4036 }
4037 return false;
4038 }
4039 };
4040
4041 static bool is_bmi_pattern(Node* n, Node* m) {
4042 assert(UseBMI1Instructions, "sanity");
4043 if (n != nullptr && m != nullptr) {
4044 if (m->Opcode() == Op_LoadI) {
4045 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4046 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4047 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4048 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4049 } else if (m->Opcode() == Op_LoadL) {
4050 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4051 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4052 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4053 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4054 }
4055 }
4056 return false;
4057 }
4058
4059 // Should the matcher clone input 'm' of node 'n'?
4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4061 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4062 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4063 mstack.push(m, Visit);
4064 return true;
4065 }
4066 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4067 mstack.push(m, Visit); // m = ShiftCntV
4068 return true;
4069 }
4070 if (is_encode_and_store_pattern(n, m)) {
4071 mstack.push(m, Visit);
4072 return true;
4073 }
4074 return false;
4075 }
4076
4077 // Should the Matcher clone shifts on addressing modes, expecting them
4078 // to be subsumed into complex addressing expressions or compute them
4079 // into registers?
4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4081 Node *off = m->in(AddPNode::Offset);
4082 if (off->is_Con()) {
4083 address_visited.test_set(m->_idx); // Flag as address_visited
4084 Node *adr = m->in(AddPNode::Address);
4085
4086 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4087 // AtomicAdd is not an addressing expression.
4088 // Cheap to find it by looking for screwy base.
4089 if (adr->is_AddP() &&
4090 !adr->in(AddPNode::Base)->is_top() &&
4091 !adr->in(AddPNode::Offset)->is_Con() &&
4092 off->get_long() == (int) (off->get_long()) && // immL32
4093 // Are there other uses besides address expressions?
4094 !is_visited(adr)) {
4095 address_visited.set(adr->_idx); // Flag as address_visited
4096 Node *shift = adr->in(AddPNode::Offset);
4097 if (!clone_shift(shift, this, mstack, address_visited)) {
4098 mstack.push(shift, Pre_Visit);
4099 }
4100 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4101 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4102 } else {
4103 mstack.push(adr, Pre_Visit);
4104 }
4105
4106 // Clone X+offset as it also folds into most addressing expressions
4107 mstack.push(off, Visit);
4108 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4109 return true;
4110 } else if (clone_shift(off, this, mstack, address_visited)) {
4111 address_visited.test_set(m->_idx); // Flag as address_visited
4112 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4113 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4114 return true;
4115 }
4116 return false;
4117 }
4118
4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4120 switch (bt) {
4121 case BoolTest::eq:
4122 return Assembler::eq;
4123 case BoolTest::ne:
4124 return Assembler::neq;
4125 case BoolTest::le:
4126 case BoolTest::ule:
4127 return Assembler::le;
4128 case BoolTest::ge:
4129 case BoolTest::uge:
4130 return Assembler::nlt;
4131 case BoolTest::lt:
4132 case BoolTest::ult:
4133 return Assembler::lt;
4134 case BoolTest::gt:
4135 case BoolTest::ugt:
4136 return Assembler::nle;
4137 default : ShouldNotReachHere(); return Assembler::_false;
4138 }
4139 }
4140
4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4142 switch (bt) {
4143 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4144 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4145 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4146 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4147 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4148 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4149 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4150 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4151 }
4152 }
4153
4154 // Helper methods for MachSpillCopyNode::implementation().
4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4156 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4157 assert(ireg == Op_VecS || // 32bit vector
4158 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4159 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4160 "no non-adjacent vector moves" );
4161 if (masm) {
4162 switch (ireg) {
4163 case Op_VecS: // copy whole register
4164 case Op_VecD:
4165 case Op_VecX:
4166 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4167 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4168 } else {
4169 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4170 }
4171 break;
4172 case Op_VecY:
4173 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4174 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4175 } else {
4176 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4177 }
4178 break;
4179 case Op_VecZ:
4180 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4181 break;
4182 default:
4183 ShouldNotReachHere();
4184 }
4185 #ifndef PRODUCT
4186 } else {
4187 switch (ireg) {
4188 case Op_VecS:
4189 case Op_VecD:
4190 case Op_VecX:
4191 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4192 break;
4193 case Op_VecY:
4194 case Op_VecZ:
4195 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4196 break;
4197 default:
4198 ShouldNotReachHere();
4199 }
4200 #endif
4201 }
4202 }
4203
4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4205 int stack_offset, int reg, uint ireg, outputStream* st) {
4206 if (masm) {
4207 if (is_load) {
4208 switch (ireg) {
4209 case Op_VecS:
4210 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4211 break;
4212 case Op_VecD:
4213 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4214 break;
4215 case Op_VecX:
4216 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4217 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4218 } else {
4219 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4220 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4221 }
4222 break;
4223 case Op_VecY:
4224 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4225 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4226 } else {
4227 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4228 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4229 }
4230 break;
4231 case Op_VecZ:
4232 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4233 break;
4234 default:
4235 ShouldNotReachHere();
4236 }
4237 } else { // store
4238 switch (ireg) {
4239 case Op_VecS:
4240 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4241 break;
4242 case Op_VecD:
4243 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4244 break;
4245 case Op_VecX:
4246 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4247 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4248 }
4249 else {
4250 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4251 }
4252 break;
4253 case Op_VecY:
4254 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4255 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4256 }
4257 else {
4258 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4259 }
4260 break;
4261 case Op_VecZ:
4262 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4263 break;
4264 default:
4265 ShouldNotReachHere();
4266 }
4267 }
4268 #ifndef PRODUCT
4269 } else {
4270 if (is_load) {
4271 switch (ireg) {
4272 case Op_VecS:
4273 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4274 break;
4275 case Op_VecD:
4276 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4277 break;
4278 case Op_VecX:
4279 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecY:
4282 case Op_VecZ:
4283 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4284 break;
4285 default:
4286 ShouldNotReachHere();
4287 }
4288 } else { // store
4289 switch (ireg) {
4290 case Op_VecS:
4291 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4292 break;
4293 case Op_VecD:
4294 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4295 break;
4296 case Op_VecX:
4297 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecY:
4300 case Op_VecZ:
4301 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4302 break;
4303 default:
4304 ShouldNotReachHere();
4305 }
4306 }
4307 #endif
4308 }
4309 }
4310
4311 template <class T>
4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4313 int size = type2aelembytes(bt) * len;
4314 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4315 for (int i = 0; i < len; i++) {
4316 int offset = i * type2aelembytes(bt);
4317 switch (bt) {
4318 case T_BYTE: val->at(i) = con; break;
4319 case T_SHORT: {
4320 jshort c = con;
4321 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4322 break;
4323 }
4324 case T_INT: {
4325 jint c = con;
4326 memcpy(val->adr_at(offset), &c, sizeof(jint));
4327 break;
4328 }
4329 case T_LONG: {
4330 jlong c = con;
4331 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4332 break;
4333 }
4334 case T_FLOAT: {
4335 jfloat c = con;
4336 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4337 break;
4338 }
4339 case T_DOUBLE: {
4340 jdouble c = con;
4341 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4342 break;
4343 }
4344 default: assert(false, "%s", type2name(bt));
4345 }
4346 }
4347 return val;
4348 }
4349
4350 static inline jlong high_bit_set(BasicType bt) {
4351 switch (bt) {
4352 case T_BYTE: return 0x8080808080808080;
4353 case T_SHORT: return 0x8000800080008000;
4354 case T_INT: return 0x8000000080000000;
4355 case T_LONG: return 0x8000000000000000;
4356 default:
4357 ShouldNotReachHere();
4358 return 0;
4359 }
4360 }
4361
4362 #ifndef PRODUCT
4363 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4364 st->print("nop \t# %d bytes pad for loops and calls", _count);
4365 }
4366 #endif
4367
4368 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4369 __ nop(_count);
4370 }
4371
4372 uint MachNopNode::size(PhaseRegAlloc*) const {
4373 return _count;
4374 }
4375
4376 #ifndef PRODUCT
4377 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4378 st->print("# breakpoint");
4379 }
4380 #endif
4381
4382 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4383 __ int3();
4384 }
4385
4386 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4387 return MachNode::size(ra_);
4388 }
4389
4390 %}
4391
4392 //----------ENCODING BLOCK-----------------------------------------------------
4393 // This block specifies the encoding classes used by the compiler to
4394 // output byte streams. Encoding classes are parameterized macros
4395 // used by Machine Instruction Nodes in order to generate the bit
4396 // encoding of the instruction. Operands specify their base encoding
4397 // interface with the interface keyword. There are currently
4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4399 // COND_INTER. REG_INTER causes an operand to generate a function
4400 // which returns its register number when queried. CONST_INTER causes
4401 // an operand to generate a function which returns the value of the
4402 // constant when queried. MEMORY_INTER causes an operand to generate
4403 // four functions which return the Base Register, the Index Register,
4404 // the Scale Value, and the Offset Value of the operand when queried.
4405 // COND_INTER causes an operand to generate six functions which return
4406 // the encoding code (ie - encoding bits for the instruction)
4407 // associated with each basic boolean condition for a conditional
4408 // instruction.
4409 //
4410 // Instructions specify two basic values for encoding. Again, a
4411 // function is available to check if the constant displacement is an
4412 // oop. They use the ins_encode keyword to specify their encoding
4413 // classes (which must be a sequence of enc_class names, and their
4414 // parameters, specified in the encoding block), and they use the
4415 // opcode keyword to specify, in order, their primary, secondary, and
4416 // tertiary opcode. Only the opcode sections which a particular
4417 // instruction needs for encoding need to be specified.
4418 encode %{
4419 enc_class cdql_enc(no_rax_rdx_RegI div)
4420 %{
4421 // Full implementation of Java idiv and irem; checks for
4422 // special case as described in JVM spec., p.243 & p.271.
4423 //
4424 // normal case special case
4425 //
4426 // input : rax: dividend min_int
4427 // reg: divisor -1
4428 //
4429 // output: rax: quotient (= rax idiv reg) min_int
4430 // rdx: remainder (= rax irem reg) 0
4431 //
4432 // Code sequnce:
4433 //
4434 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4435 // 5: 75 07/08 jne e <normal>
4436 // 7: 33 d2 xor %edx,%edx
4437 // [div >= 8 -> offset + 1]
4438 // [REX_B]
4439 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4440 // c: 74 03/04 je 11 <done>
4441 // 000000000000000e <normal>:
4442 // e: 99 cltd
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // f: f7 f9 idiv $div
4446 // 0000000000000011 <done>:
4447 Label normal;
4448 Label done;
4449
4450 // cmp $0x80000000,%eax
4451 __ cmpl(as_Register(RAX_enc), 0x80000000);
4452
4453 // jne e <normal>
4454 __ jccb(Assembler::notEqual, normal);
4455
4456 // xor %edx,%edx
4457 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4458
4459 // cmp $0xffffffffffffffff,%ecx
4460 __ cmpl($div$$Register, -1);
4461
4462 // je 11 <done>
4463 __ jccb(Assembler::equal, done);
4464
4465 // <normal>
4466 // cltd
4467 __ bind(normal);
4468 __ cdql();
4469
4470 // idivl
4471 // <done>
4472 __ idivl($div$$Register);
4473 __ bind(done);
4474 %}
4475
4476 enc_class cdqq_enc(no_rax_rdx_RegL div)
4477 %{
4478 // Full implementation of Java ldiv and lrem; checks for
4479 // special case as described in JVM spec., p.243 & p.271.
4480 //
4481 // normal case special case
4482 //
4483 // input : rax: dividend min_long
4484 // reg: divisor -1
4485 //
4486 // output: rax: quotient (= rax idiv reg) min_long
4487 // rdx: remainder (= rax irem reg) 0
4488 //
4489 // Code sequnce:
4490 //
4491 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4492 // 7: 00 00 80
4493 // a: 48 39 d0 cmp %rdx,%rax
4494 // d: 75 08 jne 17 <normal>
4495 // f: 33 d2 xor %edx,%edx
4496 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4497 // 15: 74 05 je 1c <done>
4498 // 0000000000000017 <normal>:
4499 // 17: 48 99 cqto
4500 // 19: 48 f7 f9 idiv $div
4501 // 000000000000001c <done>:
4502 Label normal;
4503 Label done;
4504
4505 // mov $0x8000000000000000,%rdx
4506 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4507
4508 // cmp %rdx,%rax
4509 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4510
4511 // jne 17 <normal>
4512 __ jccb(Assembler::notEqual, normal);
4513
4514 // xor %edx,%edx
4515 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4516
4517 // cmp $0xffffffffffffffff,$div
4518 __ cmpq($div$$Register, -1);
4519
4520 // je 1e <done>
4521 __ jccb(Assembler::equal, done);
4522
4523 // <normal>
4524 // cqto
4525 __ bind(normal);
4526 __ cdqq();
4527
4528 // idivq (note: must be emitted by the user of this rule)
4529 // <done>
4530 __ idivq($div$$Register);
4531 __ bind(done);
4532 %}
4533
4534 enc_class clear_avx %{
4535 DEBUG_ONLY(int off0 = __ offset());
4536 if (generate_vzeroupper(Compile::current())) {
4537 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4538 // Clear upper bits of YMM registers when current compiled code uses
4539 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4540 __ vzeroupper();
4541 }
4542 DEBUG_ONLY(int off1 = __ offset());
4543 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4544 %}
4545
4546 enc_class Java_To_Runtime(method meth) %{
4547 __ lea(r10, RuntimeAddress((address)$meth$$method));
4548 __ call(r10);
4549 __ post_call_nop();
4550 %}
4551
4552 enc_class Java_Static_Call(method meth)
4553 %{
4554 // JAVA STATIC CALL
4555 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4556 // determine who we intended to call.
4557 if (!_method) {
4558 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4559 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4560 // The NOP here is purely to ensure that eliding a call to
4561 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4562 __ addr_nop_5();
4563 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4564 } else {
4565 int method_index = resolved_method_index(masm);
4566 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4567 : static_call_Relocation::spec(method_index);
4568 address mark = __ pc();
4569 int call_offset = __ offset();
4570 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4571 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4572 // Calls of the same statically bound method can share
4573 // a stub to the interpreter.
4574 __ code()->shared_stub_to_interp_for(_method, call_offset);
4575 } else {
4576 // Emit stubs for static call.
4577 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4578 __ clear_inst_mark();
4579 if (stub == nullptr) {
4580 ciEnv::current()->record_failure("CodeCache is full");
4581 return;
4582 }
4583 }
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
4624 // | SP-+--------+----> Matcher::_old_SP, even aligned
4625 // | | in | 3 area for Intel ret address
4626 // Owned by |preserve| Empty on Sparc.
4627 // SELF +--------+
4628 // | | pad2 | 2 pad to align old SP
4629 // | +--------+ 1
4630 // | | locks | 0
4631 // | +--------+----> OptoReg::stack0(), even aligned
4632 // | | pad1 | 11 pad to align new SP
4633 // | +--------+
4634 // | | | 10
4635 // | | spills | 9 spills
4636 // V | | 8 (pad0 slot for callee)
4637 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4638 // ^ | out | 7
4639 // | | args | 6 Holes in outgoing args owned by CALLEE
4640 // Owned by +--------+
4641 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4642 // | new |preserve| Must be even-aligned.
4643 // | SP-+--------+----> Matcher::_new_SP, even aligned
4644 // | | |
4645 //
4646 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4647 // known from SELF's arguments and the Java calling convention.
4648 // Region 6-7 is determined per call site.
4649 // Note 2: If the calling convention leaves holes in the incoming argument
4650 // area, those holes are owned by SELF. Holes in the outgoing area
4651 // are owned by the CALLEE. Holes should not be necessary in the
4652 // incoming area, as the Java calling convention is completely under
4653 // the control of the AD file. Doubles can be sorted and packed to
4654 // avoid holes. Holes in the outgoing arguments may be necessary for
4655 // varargs C calling conventions.
4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4657 // even aligned with pad0 as needed.
4658 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4659 // region 6-11 is even aligned; it may be padded out more so that
4660 // the region from SP to FP meets the minimum stack alignment.
4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4662 // alignment. Region 11, pad1, may be dynamically extended so that
4663 // SP meets the minimum alignment.
4664
4665 frame
4666 %{
4667 // These three registers define part of the calling convention
4668 // between compiled code and the interpreter.
4669 inline_cache_reg(RAX); // Inline Cache Register
4670
4671 // Optional: name the operand used by cisc-spilling to access
4672 // [stack_pointer + offset]
4673 cisc_spilling_operand_name(indOffset32);
4674
4675 // Number of stack slots consumed by locking an object
4676 sync_stack_slots(2);
4677
4678 // Compiled code's Frame Pointer
4679 frame_pointer(RSP);
4680
4681 // Interpreter stores its frame pointer in a register which is
4682 // stored to the stack by I2CAdaptors.
4683 // I2CAdaptors convert from interpreted java to compiled java.
4684 interpreter_frame_pointer(RBP);
4685
4686 // Stack alignment requirement
4687 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4688
4689 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4690 // for calls to C. Supports the var-args backing area for register parms.
4691 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4692
4693 // The after-PROLOG location of the return address. Location of
4694 // return address specifies a type (REG or STACK) and a number
4695 // representing the register number (i.e. - use a register name) or
4696 // stack slot.
4697 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4698 // Otherwise, it is above the locks and verification slot and alignment word
4699 return_addr(STACK - 2 +
4700 align_up((Compile::current()->in_preserve_stack_slots() +
4701 Compile::current()->fixed_slots()),
4702 stack_alignment_in_slots()));
4703
4704 // Location of compiled Java return values. Same as C for now.
4705 return_value
4706 %{
4707 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4708 "only return normal values");
4709
4710 static const int lo[Op_RegL + 1] = {
4711 0,
4712 0,
4713 RAX_num, // Op_RegN
4714 RAX_num, // Op_RegI
4715 RAX_num, // Op_RegP
4716 XMM0_num, // Op_RegF
4717 XMM0_num, // Op_RegD
4718 RAX_num // Op_RegL
4719 };
4720 static const int hi[Op_RegL + 1] = {
4721 0,
4722 0,
4723 OptoReg::Bad, // Op_RegN
4724 OptoReg::Bad, // Op_RegI
4725 RAX_H_num, // Op_RegP
4726 OptoReg::Bad, // Op_RegF
4727 XMM0b_num, // Op_RegD
4728 RAX_H_num // Op_RegL
4729 };
4730 // Excluded flags and vector registers.
4731 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4732 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4733 %}
4734 %}
4735
4736 //----------ATTRIBUTES---------------------------------------------------------
4737 //----------Operand Attributes-------------------------------------------------
4738 op_attrib op_cost(0); // Required cost attribute
4739
4740 //----------Instruction Attributes---------------------------------------------
4741 ins_attrib ins_cost(100); // Required cost attribute
4742 ins_attrib ins_size(8); // Required size attribute (in bits)
4743 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4744 // a non-matching short branch variant
4745 // of some long branch?
4746 ins_attrib ins_alignment(1); // Required alignment attribute (must
4747 // be a power of 2) specifies the
4748 // alignment that some part of the
4749 // instruction (not necessarily the
4750 // start) requires. If > 1, a
4751 // compute_padding() function must be
4752 // provided for the instruction
4753
4754 // Whether this node is expanded during code emission into a sequence of
4755 // instructions and the first instruction can perform an implicit null check.
4756 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4757
4758 //----------OPERANDS-----------------------------------------------------------
4759 // Operand definitions must precede instruction definitions for correct parsing
4760 // in the ADLC because operands constitute user defined types which are used in
4761 // instruction definitions.
4762
4763 //----------Simple Operands----------------------------------------------------
4764 // Immediate Operands
4765 // Integer Immediate
4766 operand immI()
4767 %{
4768 match(ConI);
4769
4770 op_cost(10);
4771 format %{ %}
4772 interface(CONST_INTER);
4773 %}
4774
4775 // Constant for test vs zero
4776 operand immI_0()
4777 %{
4778 predicate(n->get_int() == 0);
4779 match(ConI);
4780
4781 op_cost(0);
4782 format %{ %}
4783 interface(CONST_INTER);
4784 %}
4785
4786 // Constant for increment
4787 operand immI_1()
4788 %{
4789 predicate(n->get_int() == 1);
4790 match(ConI);
4791
4792 op_cost(0);
4793 format %{ %}
4794 interface(CONST_INTER);
4795 %}
4796
4797 // Constant for decrement
4798 operand immI_M1()
4799 %{
4800 predicate(n->get_int() == -1);
4801 match(ConI);
4802
4803 op_cost(0);
4804 format %{ %}
4805 interface(CONST_INTER);
4806 %}
4807
4808 operand immI_2()
4809 %{
4810 predicate(n->get_int() == 2);
4811 match(ConI);
4812
4813 op_cost(0);
4814 format %{ %}
4815 interface(CONST_INTER);
4816 %}
4817
4818 operand immI_4()
4819 %{
4820 predicate(n->get_int() == 4);
4821 match(ConI);
4822
4823 op_cost(0);
4824 format %{ %}
4825 interface(CONST_INTER);
4826 %}
4827
4828 operand immI_8()
4829 %{
4830 predicate(n->get_int() == 8);
4831 match(ConI);
4832
4833 op_cost(0);
4834 format %{ %}
4835 interface(CONST_INTER);
4836 %}
4837
4838 // Valid scale values for addressing modes
4839 operand immI2()
4840 %{
4841 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4842 match(ConI);
4843
4844 format %{ %}
4845 interface(CONST_INTER);
4846 %}
4847
4848 operand immU7()
4849 %{
4850 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4851 match(ConI);
4852
4853 op_cost(5);
4854 format %{ %}
4855 interface(CONST_INTER);
4856 %}
4857
4858 operand immI8()
4859 %{
4860 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4861 match(ConI);
4862
4863 op_cost(5);
4864 format %{ %}
4865 interface(CONST_INTER);
4866 %}
4867
4868 operand immU8()
4869 %{
4870 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4871 match(ConI);
4872
4873 op_cost(5);
4874 format %{ %}
4875 interface(CONST_INTER);
4876 %}
4877
4878 operand immI16()
4879 %{
4880 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4881 match(ConI);
4882
4883 op_cost(10);
4884 format %{ %}
4885 interface(CONST_INTER);
4886 %}
4887
4888 // Int Immediate non-negative
4889 operand immU31()
4890 %{
4891 predicate(n->get_int() >= 0);
4892 match(ConI);
4893
4894 op_cost(0);
4895 format %{ %}
4896 interface(CONST_INTER);
4897 %}
4898
4899 // Pointer Immediate
4900 operand immP()
4901 %{
4902 match(ConP);
4903
4904 op_cost(10);
4905 format %{ %}
4906 interface(CONST_INTER);
4907 %}
4908
4909 // Null Pointer Immediate
4910 operand immP0()
4911 %{
4912 predicate(n->get_ptr() == 0);
4913 match(ConP);
4914
4915 op_cost(5);
4916 format %{ %}
4917 interface(CONST_INTER);
4918 %}
4919
4920 // Pointer Immediate
4921 operand immN() %{
4922 match(ConN);
4923
4924 op_cost(10);
4925 format %{ %}
4926 interface(CONST_INTER);
4927 %}
4928
4929 operand immNKlass() %{
4930 match(ConNKlass);
4931
4932 op_cost(10);
4933 format %{ %}
4934 interface(CONST_INTER);
4935 %}
4936
4937 // Null Pointer Immediate
4938 operand immN0() %{
4939 predicate(n->get_narrowcon() == 0);
4940 match(ConN);
4941
4942 op_cost(5);
4943 format %{ %}
4944 interface(CONST_INTER);
4945 %}
4946
4947 operand immP31()
4948 %{
4949 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4950 && (n->get_ptr() >> 31) == 0);
4951 match(ConP);
4952
4953 op_cost(5);
4954 format %{ %}
4955 interface(CONST_INTER);
4956 %}
4957
4958
4959 // Long Immediate
4960 operand immL()
4961 %{
4962 match(ConL);
4963
4964 op_cost(20);
4965 format %{ %}
4966 interface(CONST_INTER);
4967 %}
4968
4969 // Long Immediate 8-bit
4970 operand immL8()
4971 %{
4972 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4973 match(ConL);
4974
4975 op_cost(5);
4976 format %{ %}
4977 interface(CONST_INTER);
4978 %}
4979
4980 // Long Immediate 32-bit unsigned
4981 operand immUL32()
4982 %{
4983 predicate(n->get_long() == (unsigned int) (n->get_long()));
4984 match(ConL);
4985
4986 op_cost(10);
4987 format %{ %}
4988 interface(CONST_INTER);
4989 %}
4990
4991 // Long Immediate 32-bit signed
4992 operand immL32()
4993 %{
4994 predicate(n->get_long() == (int) (n->get_long()));
4995 match(ConL);
4996
4997 op_cost(15);
4998 format %{ %}
4999 interface(CONST_INTER);
5000 %}
5001
5002 operand immL_Pow2()
5003 %{
5004 predicate(is_power_of_2((julong)n->get_long()));
5005 match(ConL);
5006
5007 op_cost(15);
5008 format %{ %}
5009 interface(CONST_INTER);
5010 %}
5011
5012 operand immL_NotPow2()
5013 %{
5014 predicate(is_power_of_2((julong)~n->get_long()));
5015 match(ConL);
5016
5017 op_cost(15);
5018 format %{ %}
5019 interface(CONST_INTER);
5020 %}
5021
5022 // Long Immediate zero
5023 operand immL0()
5024 %{
5025 predicate(n->get_long() == 0L);
5026 match(ConL);
5027
5028 op_cost(10);
5029 format %{ %}
5030 interface(CONST_INTER);
5031 %}
5032
5033 // Constant for increment
5034 operand immL1()
5035 %{
5036 predicate(n->get_long() == 1);
5037 match(ConL);
5038
5039 format %{ %}
5040 interface(CONST_INTER);
5041 %}
5042
5043 // Constant for decrement
5044 operand immL_M1()
5045 %{
5046 predicate(n->get_long() == -1);
5047 match(ConL);
5048
5049 format %{ %}
5050 interface(CONST_INTER);
5051 %}
5052
5053 // Long Immediate: low 32-bit mask
5054 operand immL_32bits()
5055 %{
5056 predicate(n->get_long() == 0xFFFFFFFFL);
5057 match(ConL);
5058 op_cost(20);
5059
5060 format %{ %}
5061 interface(CONST_INTER);
5062 %}
5063
5064 // Int Immediate: 2^n-1, positive
5065 operand immI_Pow2M1()
5066 %{
5067 predicate((n->get_int() > 0)
5068 && is_power_of_2((juint)n->get_int() + 1));
5069 match(ConI);
5070
5071 op_cost(20);
5072 format %{ %}
5073 interface(CONST_INTER);
5074 %}
5075
5076 // Float Immediate zero
5077 operand immF0()
5078 %{
5079 predicate(jint_cast(n->getf()) == 0);
5080 match(ConF);
5081
5082 op_cost(5);
5083 format %{ %}
5084 interface(CONST_INTER);
5085 %}
5086
5087 // Float Immediate
5088 operand immF()
5089 %{
5090 match(ConF);
5091
5092 op_cost(15);
5093 format %{ %}
5094 interface(CONST_INTER);
5095 %}
5096
5097 // Half Float Immediate
5098 operand immH()
5099 %{
5100 match(ConH);
5101
5102 op_cost(15);
5103 format %{ %}
5104 interface(CONST_INTER);
5105 %}
5106
5107 // Double Immediate zero
5108 operand immD0()
5109 %{
5110 predicate(jlong_cast(n->getd()) == 0);
5111 match(ConD);
5112
5113 op_cost(5);
5114 format %{ %}
5115 interface(CONST_INTER);
5116 %}
5117
5118 // Double Immediate
5119 operand immD()
5120 %{
5121 match(ConD);
5122
5123 op_cost(15);
5124 format %{ %}
5125 interface(CONST_INTER);
5126 %}
5127
5128 // Immediates for special shifts (sign extend)
5129
5130 // Constants for increment
5131 operand immI_16()
5132 %{
5133 predicate(n->get_int() == 16);
5134 match(ConI);
5135
5136 format %{ %}
5137 interface(CONST_INTER);
5138 %}
5139
5140 operand immI_24()
5141 %{
5142 predicate(n->get_int() == 24);
5143 match(ConI);
5144
5145 format %{ %}
5146 interface(CONST_INTER);
5147 %}
5148
5149 // Constant for byte-wide masking
5150 operand immI_255()
5151 %{
5152 predicate(n->get_int() == 255);
5153 match(ConI);
5154
5155 format %{ %}
5156 interface(CONST_INTER);
5157 %}
5158
5159 // Constant for short-wide masking
5160 operand immI_65535()
5161 %{
5162 predicate(n->get_int() == 65535);
5163 match(ConI);
5164
5165 format %{ %}
5166 interface(CONST_INTER);
5167 %}
5168
5169 // Constant for byte-wide masking
5170 operand immL_255()
5171 %{
5172 predicate(n->get_long() == 255);
5173 match(ConL);
5174
5175 format %{ %}
5176 interface(CONST_INTER);
5177 %}
5178
5179 // Constant for short-wide masking
5180 operand immL_65535()
5181 %{
5182 predicate(n->get_long() == 65535);
5183 match(ConL);
5184
5185 format %{ %}
5186 interface(CONST_INTER);
5187 %}
5188
5189 // AOT Runtime Constants Address
5190 operand immAOTRuntimeConstantsAddress()
5191 %{
5192 // Check if the address is in the range of AOT Runtime Constants
5193 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5194 match(ConP);
5195
5196 op_cost(0);
5197 format %{ %}
5198 interface(CONST_INTER);
5199 %}
5200
5201 operand kReg()
5202 %{
5203 constraint(ALLOC_IN_RC(vectmask_reg));
5204 match(RegVectMask);
5205 format %{%}
5206 interface(REG_INTER);
5207 %}
5208
5209 // Register Operands
5210 // Integer Register
5211 operand rRegI()
5212 %{
5213 constraint(ALLOC_IN_RC(int_reg));
5214 match(RegI);
5215
5216 match(rax_RegI);
5217 match(rbx_RegI);
5218 match(rcx_RegI);
5219 match(rdx_RegI);
5220 match(rdi_RegI);
5221
5222 format %{ %}
5223 interface(REG_INTER);
5224 %}
5225
5226 // Special Registers
5227 operand rax_RegI()
5228 %{
5229 constraint(ALLOC_IN_RC(int_rax_reg));
5230 match(RegI);
5231 match(rRegI);
5232
5233 format %{ "RAX" %}
5234 interface(REG_INTER);
5235 %}
5236
5237 // Special Registers
5238 operand rbx_RegI()
5239 %{
5240 constraint(ALLOC_IN_RC(int_rbx_reg));
5241 match(RegI);
5242 match(rRegI);
5243
5244 format %{ "RBX" %}
5245 interface(REG_INTER);
5246 %}
5247
5248 operand rcx_RegI()
5249 %{
5250 constraint(ALLOC_IN_RC(int_rcx_reg));
5251 match(RegI);
5252 match(rRegI);
5253
5254 format %{ "RCX" %}
5255 interface(REG_INTER);
5256 %}
5257
5258 operand rdx_RegI()
5259 %{
5260 constraint(ALLOC_IN_RC(int_rdx_reg));
5261 match(RegI);
5262 match(rRegI);
5263
5264 format %{ "RDX" %}
5265 interface(REG_INTER);
5266 %}
5267
5268 operand rdi_RegI()
5269 %{
5270 constraint(ALLOC_IN_RC(int_rdi_reg));
5271 match(RegI);
5272 match(rRegI);
5273
5274 format %{ "RDI" %}
5275 interface(REG_INTER);
5276 %}
5277
5278 operand no_rax_rdx_RegI()
5279 %{
5280 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5281 match(RegI);
5282 match(rbx_RegI);
5283 match(rcx_RegI);
5284 match(rdi_RegI);
5285
5286 format %{ %}
5287 interface(REG_INTER);
5288 %}
5289
5290 operand no_rbp_r13_RegI()
5291 %{
5292 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5293 match(RegI);
5294 match(rRegI);
5295 match(rax_RegI);
5296 match(rbx_RegI);
5297 match(rcx_RegI);
5298 match(rdx_RegI);
5299 match(rdi_RegI);
5300
5301 format %{ %}
5302 interface(REG_INTER);
5303 %}
5304
5305 // Pointer Register
5306 operand any_RegP()
5307 %{
5308 constraint(ALLOC_IN_RC(any_reg));
5309 match(RegP);
5310 match(rax_RegP);
5311 match(rbx_RegP);
5312 match(rdi_RegP);
5313 match(rsi_RegP);
5314 match(rbp_RegP);
5315 match(r15_RegP);
5316 match(rRegP);
5317
5318 format %{ %}
5319 interface(REG_INTER);
5320 %}
5321
5322 operand rRegP()
5323 %{
5324 constraint(ALLOC_IN_RC(ptr_reg));
5325 match(RegP);
5326 match(rax_RegP);
5327 match(rbx_RegP);
5328 match(rdi_RegP);
5329 match(rsi_RegP);
5330 match(rbp_RegP); // See Q&A below about
5331 match(r15_RegP); // r15_RegP and rbp_RegP.
5332
5333 format %{ %}
5334 interface(REG_INTER);
5335 %}
5336
5337 operand rRegN() %{
5338 constraint(ALLOC_IN_RC(int_reg));
5339 match(RegN);
5340
5341 format %{ %}
5342 interface(REG_INTER);
5343 %}
5344
5345 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5346 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5347 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5348 // The output of an instruction is controlled by the allocator, which respects
5349 // register class masks, not match rules. Unless an instruction mentions
5350 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5351 // by the allocator as an input.
5352 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5353 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5354 // result, RBP is not included in the output of the instruction either.
5355
5356 // This operand is not allowed to use RBP even if
5357 // RBP is not used to hold the frame pointer.
5358 operand no_rbp_RegP()
5359 %{
5360 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5361 match(RegP);
5362 match(rbx_RegP);
5363 match(rsi_RegP);
5364 match(rdi_RegP);
5365
5366 format %{ %}
5367 interface(REG_INTER);
5368 %}
5369
5370 // Special Registers
5371 // Return a pointer value
5372 operand rax_RegP()
5373 %{
5374 constraint(ALLOC_IN_RC(ptr_rax_reg));
5375 match(RegP);
5376 match(rRegP);
5377
5378 format %{ %}
5379 interface(REG_INTER);
5380 %}
5381
5382 // Special Registers
5383 // Return a compressed pointer value
5384 operand rax_RegN()
5385 %{
5386 constraint(ALLOC_IN_RC(int_rax_reg));
5387 match(RegN);
5388 match(rRegN);
5389
5390 format %{ %}
5391 interface(REG_INTER);
5392 %}
5393
5394 // Used in AtomicAdd
5395 operand rbx_RegP()
5396 %{
5397 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5398 match(RegP);
5399 match(rRegP);
5400
5401 format %{ %}
5402 interface(REG_INTER);
5403 %}
5404
5405 operand rsi_RegP()
5406 %{
5407 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5408 match(RegP);
5409 match(rRegP);
5410
5411 format %{ %}
5412 interface(REG_INTER);
5413 %}
5414
5415 operand rbp_RegP()
5416 %{
5417 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5418 match(RegP);
5419 match(rRegP);
5420
5421 format %{ %}
5422 interface(REG_INTER);
5423 %}
5424
5425 // Used in rep stosq
5426 operand rdi_RegP()
5427 %{
5428 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5429 match(RegP);
5430 match(rRegP);
5431
5432 format %{ %}
5433 interface(REG_INTER);
5434 %}
5435
5436 operand r15_RegP()
5437 %{
5438 constraint(ALLOC_IN_RC(ptr_r15_reg));
5439 match(RegP);
5440 match(rRegP);
5441
5442 format %{ %}
5443 interface(REG_INTER);
5444 %}
5445
5446 operand rRegL()
5447 %{
5448 constraint(ALLOC_IN_RC(long_reg));
5449 match(RegL);
5450 match(rax_RegL);
5451 match(rdx_RegL);
5452
5453 format %{ %}
5454 interface(REG_INTER);
5455 %}
5456
5457 // Special Registers
5458 operand no_rax_rdx_RegL()
5459 %{
5460 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5461 match(RegL);
5462 match(rRegL);
5463
5464 format %{ %}
5465 interface(REG_INTER);
5466 %}
5467
5468 operand rax_RegL()
5469 %{
5470 constraint(ALLOC_IN_RC(long_rax_reg));
5471 match(RegL);
5472 match(rRegL);
5473
5474 format %{ "RAX" %}
5475 interface(REG_INTER);
5476 %}
5477
5478 operand rcx_RegL()
5479 %{
5480 constraint(ALLOC_IN_RC(long_rcx_reg));
5481 match(RegL);
5482 match(rRegL);
5483
5484 format %{ %}
5485 interface(REG_INTER);
5486 %}
5487
5488 operand rdx_RegL()
5489 %{
5490 constraint(ALLOC_IN_RC(long_rdx_reg));
5491 match(RegL);
5492 match(rRegL);
5493
5494 format %{ %}
5495 interface(REG_INTER);
5496 %}
5497
5498 operand r11_RegL()
5499 %{
5500 constraint(ALLOC_IN_RC(long_r11_reg));
5501 match(RegL);
5502 match(rRegL);
5503
5504 format %{ %}
5505 interface(REG_INTER);
5506 %}
5507
5508 operand no_rbp_r13_RegL()
5509 %{
5510 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5511 match(RegL);
5512 match(rRegL);
5513 match(rax_RegL);
5514 match(rcx_RegL);
5515 match(rdx_RegL);
5516
5517 format %{ %}
5518 interface(REG_INTER);
5519 %}
5520
5521 // Flags register, used as output of compare instructions
5522 operand rFlagsReg()
5523 %{
5524 constraint(ALLOC_IN_RC(int_flags));
5525 match(RegFlags);
5526
5527 format %{ "RFLAGS" %}
5528 interface(REG_INTER);
5529 %}
5530
5531 // Flags register, used as output of FLOATING POINT compare instructions
5532 operand rFlagsRegU()
5533 %{
5534 constraint(ALLOC_IN_RC(int_flags));
5535 match(RegFlags);
5536
5537 format %{ "RFLAGS_U" %}
5538 interface(REG_INTER);
5539 %}
5540
5541 operand rFlagsRegUCF() %{
5542 constraint(ALLOC_IN_RC(int_flags));
5543 match(RegFlags);
5544 predicate(false);
5545
5546 format %{ "RFLAGS_U_CF" %}
5547 interface(REG_INTER);
5548 %}
5549
5550 // Float register operands
5551 operand regF() %{
5552 constraint(ALLOC_IN_RC(float_reg));
5553 match(RegF);
5554
5555 format %{ %}
5556 interface(REG_INTER);
5557 %}
5558
5559 // Float register operands
5560 operand legRegF() %{
5561 constraint(ALLOC_IN_RC(float_reg_legacy));
5562 match(RegF);
5563
5564 format %{ %}
5565 interface(REG_INTER);
5566 %}
5567
5568 // Float register operands
5569 operand vlRegF() %{
5570 constraint(ALLOC_IN_RC(float_reg_vl));
5571 match(RegF);
5572
5573 format %{ %}
5574 interface(REG_INTER);
5575 %}
5576
5577 // Double register operands
5578 operand regD() %{
5579 constraint(ALLOC_IN_RC(double_reg));
5580 match(RegD);
5581
5582 format %{ %}
5583 interface(REG_INTER);
5584 %}
5585
5586 // Double register operands
5587 operand legRegD() %{
5588 constraint(ALLOC_IN_RC(double_reg_legacy));
5589 match(RegD);
5590
5591 format %{ %}
5592 interface(REG_INTER);
5593 %}
5594
5595 // Double register operands
5596 operand vlRegD() %{
5597 constraint(ALLOC_IN_RC(double_reg_vl));
5598 match(RegD);
5599
5600 format %{ %}
5601 interface(REG_INTER);
5602 %}
5603
5604 //----------Memory Operands----------------------------------------------------
5605 // Direct Memory Operand
5606 // operand direct(immP addr)
5607 // %{
5608 // match(addr);
5609
5610 // format %{ "[$addr]" %}
5611 // interface(MEMORY_INTER) %{
5612 // base(0xFFFFFFFF);
5613 // index(0x4);
5614 // scale(0x0);
5615 // disp($addr);
5616 // %}
5617 // %}
5618
5619 // Indirect Memory Operand
5620 operand indirect(any_RegP reg)
5621 %{
5622 constraint(ALLOC_IN_RC(ptr_reg));
5623 match(reg);
5624
5625 format %{ "[$reg]" %}
5626 interface(MEMORY_INTER) %{
5627 base($reg);
5628 index(0x4);
5629 scale(0x0);
5630 disp(0x0);
5631 %}
5632 %}
5633
5634 // Indirect Memory Plus Short Offset Operand
5635 operand indOffset8(any_RegP reg, immL8 off)
5636 %{
5637 constraint(ALLOC_IN_RC(ptr_reg));
5638 match(AddP reg off);
5639
5640 format %{ "[$reg + $off (8-bit)]" %}
5641 interface(MEMORY_INTER) %{
5642 base($reg);
5643 index(0x4);
5644 scale(0x0);
5645 disp($off);
5646 %}
5647 %}
5648
5649 // Indirect Memory Plus Long Offset Operand
5650 operand indOffset32(any_RegP reg, immL32 off)
5651 %{
5652 constraint(ALLOC_IN_RC(ptr_reg));
5653 match(AddP reg off);
5654
5655 format %{ "[$reg + $off (32-bit)]" %}
5656 interface(MEMORY_INTER) %{
5657 base($reg);
5658 index(0x4);
5659 scale(0x0);
5660 disp($off);
5661 %}
5662 %}
5663
5664 // Indirect Memory Plus Index Register Plus Offset Operand
5665 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5666 %{
5667 constraint(ALLOC_IN_RC(ptr_reg));
5668 match(AddP (AddP reg lreg) off);
5669
5670 op_cost(10);
5671 format %{"[$reg + $off + $lreg]" %}
5672 interface(MEMORY_INTER) %{
5673 base($reg);
5674 index($lreg);
5675 scale(0x0);
5676 disp($off);
5677 %}
5678 %}
5679
5680 // Indirect Memory Plus Index Register Plus Offset Operand
5681 operand indIndex(any_RegP reg, rRegL lreg)
5682 %{
5683 constraint(ALLOC_IN_RC(ptr_reg));
5684 match(AddP reg lreg);
5685
5686 op_cost(10);
5687 format %{"[$reg + $lreg]" %}
5688 interface(MEMORY_INTER) %{
5689 base($reg);
5690 index($lreg);
5691 scale(0x0);
5692 disp(0x0);
5693 %}
5694 %}
5695
5696 // Indirect Memory Times Scale Plus Index Register
5697 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5698 %{
5699 constraint(ALLOC_IN_RC(ptr_reg));
5700 match(AddP reg (LShiftL lreg scale));
5701
5702 op_cost(10);
5703 format %{"[$reg + $lreg << $scale]" %}
5704 interface(MEMORY_INTER) %{
5705 base($reg);
5706 index($lreg);
5707 scale($scale);
5708 disp(0x0);
5709 %}
5710 %}
5711
5712 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5713 %{
5714 constraint(ALLOC_IN_RC(ptr_reg));
5715 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5716 match(AddP reg (LShiftL (ConvI2L idx) scale));
5717
5718 op_cost(10);
5719 format %{"[$reg + pos $idx << $scale]" %}
5720 interface(MEMORY_INTER) %{
5721 base($reg);
5722 index($idx);
5723 scale($scale);
5724 disp(0x0);
5725 %}
5726 %}
5727
5728 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5729 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5730 %{
5731 constraint(ALLOC_IN_RC(ptr_reg));
5732 match(AddP (AddP reg (LShiftL lreg scale)) off);
5733
5734 op_cost(10);
5735 format %{"[$reg + $off + $lreg << $scale]" %}
5736 interface(MEMORY_INTER) %{
5737 base($reg);
5738 index($lreg);
5739 scale($scale);
5740 disp($off);
5741 %}
5742 %}
5743
5744 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5745 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5746 %{
5747 constraint(ALLOC_IN_RC(ptr_reg));
5748 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5749 match(AddP (AddP reg (ConvI2L idx)) off);
5750
5751 op_cost(10);
5752 format %{"[$reg + $off + $idx]" %}
5753 interface(MEMORY_INTER) %{
5754 base($reg);
5755 index($idx);
5756 scale(0x0);
5757 disp($off);
5758 %}
5759 %}
5760
5761 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5762 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5763 %{
5764 constraint(ALLOC_IN_RC(ptr_reg));
5765 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5766 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5767
5768 op_cost(10);
5769 format %{"[$reg + $off + $idx << $scale]" %}
5770 interface(MEMORY_INTER) %{
5771 base($reg);
5772 index($idx);
5773 scale($scale);
5774 disp($off);
5775 %}
5776 %}
5777
5778 // Indirect Narrow Oop Plus Offset Operand
5779 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5780 // we can't free r12 even with CompressedOops::base() == nullptr.
5781 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5782 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5783 constraint(ALLOC_IN_RC(ptr_reg));
5784 match(AddP (DecodeN reg) off);
5785
5786 op_cost(10);
5787 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5788 interface(MEMORY_INTER) %{
5789 base(0xc); // R12
5790 index($reg);
5791 scale(0x3);
5792 disp($off);
5793 %}
5794 %}
5795
5796 // Indirect Memory Operand
5797 operand indirectNarrow(rRegN reg)
5798 %{
5799 predicate(CompressedOops::shift() == 0);
5800 constraint(ALLOC_IN_RC(ptr_reg));
5801 match(DecodeN reg);
5802
5803 format %{ "[$reg]" %}
5804 interface(MEMORY_INTER) %{
5805 base($reg);
5806 index(0x4);
5807 scale(0x0);
5808 disp(0x0);
5809 %}
5810 %}
5811
5812 // Indirect Memory Plus Short Offset Operand
5813 operand indOffset8Narrow(rRegN reg, immL8 off)
5814 %{
5815 predicate(CompressedOops::shift() == 0);
5816 constraint(ALLOC_IN_RC(ptr_reg));
5817 match(AddP (DecodeN reg) off);
5818
5819 format %{ "[$reg + $off (8-bit)]" %}
5820 interface(MEMORY_INTER) %{
5821 base($reg);
5822 index(0x4);
5823 scale(0x0);
5824 disp($off);
5825 %}
5826 %}
5827
5828 // Indirect Memory Plus Long Offset Operand
5829 operand indOffset32Narrow(rRegN reg, immL32 off)
5830 %{
5831 predicate(CompressedOops::shift() == 0);
5832 constraint(ALLOC_IN_RC(ptr_reg));
5833 match(AddP (DecodeN reg) off);
5834
5835 format %{ "[$reg + $off (32-bit)]" %}
5836 interface(MEMORY_INTER) %{
5837 base($reg);
5838 index(0x4);
5839 scale(0x0);
5840 disp($off);
5841 %}
5842 %}
5843
5844 // Indirect Memory Plus Index Register Plus Offset Operand
5845 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5846 %{
5847 predicate(CompressedOops::shift() == 0);
5848 constraint(ALLOC_IN_RC(ptr_reg));
5849 match(AddP (AddP (DecodeN reg) lreg) off);
5850
5851 op_cost(10);
5852 format %{"[$reg + $off + $lreg]" %}
5853 interface(MEMORY_INTER) %{
5854 base($reg);
5855 index($lreg);
5856 scale(0x0);
5857 disp($off);
5858 %}
5859 %}
5860
5861 // Indirect Memory Plus Index Register Plus Offset Operand
5862 operand indIndexNarrow(rRegN reg, rRegL lreg)
5863 %{
5864 predicate(CompressedOops::shift() == 0);
5865 constraint(ALLOC_IN_RC(ptr_reg));
5866 match(AddP (DecodeN reg) lreg);
5867
5868 op_cost(10);
5869 format %{"[$reg + $lreg]" %}
5870 interface(MEMORY_INTER) %{
5871 base($reg);
5872 index($lreg);
5873 scale(0x0);
5874 disp(0x0);
5875 %}
5876 %}
5877
5878 // Indirect Memory Times Scale Plus Index Register
5879 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5880 %{
5881 predicate(CompressedOops::shift() == 0);
5882 constraint(ALLOC_IN_RC(ptr_reg));
5883 match(AddP (DecodeN reg) (LShiftL lreg scale));
5884
5885 op_cost(10);
5886 format %{"[$reg + $lreg << $scale]" %}
5887 interface(MEMORY_INTER) %{
5888 base($reg);
5889 index($lreg);
5890 scale($scale);
5891 disp(0x0);
5892 %}
5893 %}
5894
5895 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5896 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5897 %{
5898 predicate(CompressedOops::shift() == 0);
5899 constraint(ALLOC_IN_RC(ptr_reg));
5900 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5901
5902 op_cost(10);
5903 format %{"[$reg + $off + $lreg << $scale]" %}
5904 interface(MEMORY_INTER) %{
5905 base($reg);
5906 index($lreg);
5907 scale($scale);
5908 disp($off);
5909 %}
5910 %}
5911
5912 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5913 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5914 %{
5915 constraint(ALLOC_IN_RC(ptr_reg));
5916 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5917 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5918
5919 op_cost(10);
5920 format %{"[$reg + $off + $idx]" %}
5921 interface(MEMORY_INTER) %{
5922 base($reg);
5923 index($idx);
5924 scale(0x0);
5925 disp($off);
5926 %}
5927 %}
5928
5929 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5930 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5931 %{
5932 constraint(ALLOC_IN_RC(ptr_reg));
5933 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5934 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5935
5936 op_cost(10);
5937 format %{"[$reg + $off + $idx << $scale]" %}
5938 interface(MEMORY_INTER) %{
5939 base($reg);
5940 index($idx);
5941 scale($scale);
5942 disp($off);
5943 %}
5944 %}
5945
5946 //----------Special Memory Operands--------------------------------------------
5947 // Stack Slot Operand - This operand is used for loading and storing temporary
5948 // values on the stack where a match requires a value to
5949 // flow through memory.
5950 operand stackSlotP(sRegP reg)
5951 %{
5952 constraint(ALLOC_IN_RC(stack_slots));
5953 // No match rule because this operand is only generated in matching
5954
5955 format %{ "[$reg]" %}
5956 interface(MEMORY_INTER) %{
5957 base(0x4); // RSP
5958 index(0x4); // No Index
5959 scale(0x0); // No Scale
5960 disp($reg); // Stack Offset
5961 %}
5962 %}
5963
5964 operand stackSlotI(sRegI reg)
5965 %{
5966 constraint(ALLOC_IN_RC(stack_slots));
5967 // No match rule because this operand is only generated in matching
5968
5969 format %{ "[$reg]" %}
5970 interface(MEMORY_INTER) %{
5971 base(0x4); // RSP
5972 index(0x4); // No Index
5973 scale(0x0); // No Scale
5974 disp($reg); // Stack Offset
5975 %}
5976 %}
5977
5978 operand stackSlotF(sRegF reg)
5979 %{
5980 constraint(ALLOC_IN_RC(stack_slots));
5981 // No match rule because this operand is only generated in matching
5982
5983 format %{ "[$reg]" %}
5984 interface(MEMORY_INTER) %{
5985 base(0x4); // RSP
5986 index(0x4); // No Index
5987 scale(0x0); // No Scale
5988 disp($reg); // Stack Offset
5989 %}
5990 %}
5991
5992 operand stackSlotD(sRegD reg)
5993 %{
5994 constraint(ALLOC_IN_RC(stack_slots));
5995 // No match rule because this operand is only generated in matching
5996
5997 format %{ "[$reg]" %}
5998 interface(MEMORY_INTER) %{
5999 base(0x4); // RSP
6000 index(0x4); // No Index
6001 scale(0x0); // No Scale
6002 disp($reg); // Stack Offset
6003 %}
6004 %}
6005 operand stackSlotL(sRegL reg)
6006 %{
6007 constraint(ALLOC_IN_RC(stack_slots));
6008 // No match rule because this operand is only generated in matching
6009
6010 format %{ "[$reg]" %}
6011 interface(MEMORY_INTER) %{
6012 base(0x4); // RSP
6013 index(0x4); // No Index
6014 scale(0x0); // No Scale
6015 disp($reg); // Stack Offset
6016 %}
6017 %}
6018
6019 //----------Conditional Branch Operands----------------------------------------
6020 // Comparison Op - This is the operation of the comparison, and is limited to
6021 // the following set of codes:
6022 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6023 //
6024 // Other attributes of the comparison, such as unsignedness, are specified
6025 // by the comparison instruction that sets a condition code flags register.
6026 // That result is represented by a flags operand whose subtype is appropriate
6027 // to the unsignedness (etc.) of the comparison.
6028 //
6029 // Later, the instruction which matches both the Comparison Op (a Bool) and
6030 // the flags (produced by the Cmp) specifies the coding of the comparison op
6031 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6032
6033 // Comparison Code
6034 operand cmpOp()
6035 %{
6036 match(Bool);
6037
6038 format %{ "" %}
6039 interface(COND_INTER) %{
6040 equal(0x4, "e");
6041 not_equal(0x5, "ne");
6042 less(0xC, "l");
6043 greater_equal(0xD, "ge");
6044 less_equal(0xE, "le");
6045 greater(0xF, "g");
6046 overflow(0x0, "o");
6047 no_overflow(0x1, "no");
6048 %}
6049 %}
6050
6051 // Comparison Code, unsigned compare. Used by FP also, with
6052 // C2 (unordered) turned into GT or LT already. The other bits
6053 // C0 and C3 are turned into Carry & Zero flags.
6054 operand cmpOpU()
6055 %{
6056 match(Bool);
6057
6058 format %{ "" %}
6059 interface(COND_INTER) %{
6060 equal(0x4, "e");
6061 not_equal(0x5, "ne");
6062 less(0x2, "b");
6063 greater_equal(0x3, "ae");
6064 less_equal(0x6, "be");
6065 greater(0x7, "a");
6066 overflow(0x0, "o");
6067 no_overflow(0x1, "no");
6068 %}
6069 %}
6070
6071
6072 // Floating comparisons that don't require any fixup for the unordered case,
6073 // If both inputs of the comparison are the same, ZF is always set so we
6074 // don't need to use cmpOpUCF2 for eq/ne
6075 operand cmpOpUCF() %{
6076 match(Bool);
6077 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6078 n->as_Bool()->_test._test == BoolTest::ge ||
6079 n->as_Bool()->_test._test == BoolTest::le ||
6080 n->as_Bool()->_test._test == BoolTest::gt ||
6081 n->in(1)->in(1) == n->in(1)->in(2));
6082 format %{ "" %}
6083 interface(COND_INTER) %{
6084 equal(0xb, "np");
6085 not_equal(0xa, "p");
6086 less(0x2, "b");
6087 greater_equal(0x3, "ae");
6088 less_equal(0x6, "be");
6089 greater(0x7, "a");
6090 overflow(0x0, "o");
6091 no_overflow(0x1, "no");
6092 %}
6093 %}
6094
6095
6096 // Floating comparisons that can be fixed up with extra conditional jumps
6097 operand cmpOpUCF2() %{
6098 match(Bool);
6099 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6100 n->as_Bool()->_test._test == BoolTest::eq) &&
6101 n->in(1)->in(1) != n->in(1)->in(2));
6102 format %{ "" %}
6103 interface(COND_INTER) %{
6104 equal(0x4, "e");
6105 not_equal(0x5, "ne");
6106 less(0x2, "b");
6107 greater_equal(0x3, "ae");
6108 less_equal(0x6, "be");
6109 greater(0x7, "a");
6110 overflow(0x0, "o");
6111 no_overflow(0x1, "no");
6112 %}
6113 %}
6114
6115 // Operands for bound floating pointer register arguments
6116 operand rxmm0() %{
6117 constraint(ALLOC_IN_RC(xmm0_reg));
6118 match(VecX);
6119 format%{%}
6120 interface(REG_INTER);
6121 %}
6122
6123 // Vectors
6124
6125 // Dummy generic vector class. Should be used for all vector operands.
6126 // Replaced with vec[SDXYZ] during post-selection pass.
6127 operand vec() %{
6128 constraint(ALLOC_IN_RC(dynamic));
6129 match(VecX);
6130 match(VecY);
6131 match(VecZ);
6132 match(VecS);
6133 match(VecD);
6134
6135 format %{ %}
6136 interface(REG_INTER);
6137 %}
6138
6139 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6140 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6141 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6142 // runtime code generation via reg_class_dynamic.
6143 operand legVec() %{
6144 constraint(ALLOC_IN_RC(dynamic));
6145 match(VecX);
6146 match(VecY);
6147 match(VecZ);
6148 match(VecS);
6149 match(VecD);
6150
6151 format %{ %}
6152 interface(REG_INTER);
6153 %}
6154
6155 // Replaces vec during post-selection cleanup. See above.
6156 operand vecS() %{
6157 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6158 match(VecS);
6159
6160 format %{ %}
6161 interface(REG_INTER);
6162 %}
6163
6164 // Replaces legVec during post-selection cleanup. See above.
6165 operand legVecS() %{
6166 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6167 match(VecS);
6168
6169 format %{ %}
6170 interface(REG_INTER);
6171 %}
6172
6173 // Replaces vec during post-selection cleanup. See above.
6174 operand vecD() %{
6175 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6176 match(VecD);
6177
6178 format %{ %}
6179 interface(REG_INTER);
6180 %}
6181
6182 // Replaces legVec during post-selection cleanup. See above.
6183 operand legVecD() %{
6184 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6185 match(VecD);
6186
6187 format %{ %}
6188 interface(REG_INTER);
6189 %}
6190
6191 // Replaces vec during post-selection cleanup. See above.
6192 operand vecX() %{
6193 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6194 match(VecX);
6195
6196 format %{ %}
6197 interface(REG_INTER);
6198 %}
6199
6200 // Replaces legVec during post-selection cleanup. See above.
6201 operand legVecX() %{
6202 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6203 match(VecX);
6204
6205 format %{ %}
6206 interface(REG_INTER);
6207 %}
6208
6209 // Replaces vec during post-selection cleanup. See above.
6210 operand vecY() %{
6211 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6212 match(VecY);
6213
6214 format %{ %}
6215 interface(REG_INTER);
6216 %}
6217
6218 // Replaces legVec during post-selection cleanup. See above.
6219 operand legVecY() %{
6220 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6221 match(VecY);
6222
6223 format %{ %}
6224 interface(REG_INTER);
6225 %}
6226
6227 // Replaces vec during post-selection cleanup. See above.
6228 operand vecZ() %{
6229 constraint(ALLOC_IN_RC(vectorz_reg));
6230 match(VecZ);
6231
6232 format %{ %}
6233 interface(REG_INTER);
6234 %}
6235
6236 // Replaces legVec during post-selection cleanup. See above.
6237 operand legVecZ() %{
6238 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6239 match(VecZ);
6240
6241 format %{ %}
6242 interface(REG_INTER);
6243 %}
6244
6245 //----------OPERAND CLASSES----------------------------------------------------
6246 // Operand Classes are groups of operands that are used as to simplify
6247 // instruction definitions by not requiring the AD writer to specify separate
6248 // instructions for every form of operand when the instruction accepts
6249 // multiple operand types with the same basic encoding and format. The classic
6250 // case of this is memory operands.
6251
6252 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6253 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6254 indCompressedOopOffset,
6255 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6256 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6257 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6258
6259 //----------PIPELINE-----------------------------------------------------------
6260 // Rules which define the behavior of the target architectures pipeline.
6261 pipeline %{
6262
6263 //----------ATTRIBUTES---------------------------------------------------------
6264 attributes %{
6265 variable_size_instructions; // Fixed size instructions
6266 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6267 instruction_unit_size = 1; // An instruction is 1 bytes long
6268 instruction_fetch_unit_size = 16; // The processor fetches one line
6269 instruction_fetch_units = 1; // of 16 bytes
6270 %}
6271
6272 //----------RESOURCES----------------------------------------------------------
6273 // Resources are the functional units available to the machine
6274
6275 // Generic P2/P3 pipeline
6276 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6277 // 3 instructions decoded per cycle.
6278 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6279 // 3 ALU op, only ALU0 handles mul instructions.
6280 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6281 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6282 BR, FPU,
6283 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6284
6285 //----------PIPELINE DESCRIPTION-----------------------------------------------
6286 // Pipeline Description specifies the stages in the machine's pipeline
6287
6288 // Generic P2/P3 pipeline
6289 pipe_desc(S0, S1, S2, S3, S4, S5);
6290
6291 //----------PIPELINE CLASSES---------------------------------------------------
6292 // Pipeline Classes describe the stages in which input and output are
6293 // referenced by the hardware pipeline.
6294
6295 // Naming convention: ialu or fpu
6296 // Then: _reg
6297 // Then: _reg if there is a 2nd register
6298 // Then: _long if it's a pair of instructions implementing a long
6299 // Then: _fat if it requires the big decoder
6300 // Or: _mem if it requires the big decoder and a memory unit.
6301
6302 // Integer ALU reg operation
6303 pipe_class ialu_reg(rRegI dst)
6304 %{
6305 single_instruction;
6306 dst : S4(write);
6307 dst : S3(read);
6308 DECODE : S0; // any decoder
6309 ALU : S3; // any alu
6310 %}
6311
6312 // Long ALU reg operation
6313 pipe_class ialu_reg_long(rRegL dst)
6314 %{
6315 instruction_count(2);
6316 dst : S4(write);
6317 dst : S3(read);
6318 DECODE : S0(2); // any 2 decoders
6319 ALU : S3(2); // both alus
6320 %}
6321
6322 // Integer ALU reg operation using big decoder
6323 pipe_class ialu_reg_fat(rRegI dst)
6324 %{
6325 single_instruction;
6326 dst : S4(write);
6327 dst : S3(read);
6328 D0 : S0; // big decoder only
6329 ALU : S3; // any alu
6330 %}
6331
6332 // Integer ALU reg-reg operation
6333 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6334 %{
6335 single_instruction;
6336 dst : S4(write);
6337 src : S3(read);
6338 DECODE : S0; // any decoder
6339 ALU : S3; // any alu
6340 %}
6341
6342 // Integer ALU reg-reg operation
6343 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6344 %{
6345 single_instruction;
6346 dst : S4(write);
6347 src : S3(read);
6348 D0 : S0; // big decoder only
6349 ALU : S3; // any alu
6350 %}
6351
6352 // Integer ALU reg-mem operation
6353 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6354 %{
6355 single_instruction;
6356 dst : S5(write);
6357 mem : S3(read);
6358 D0 : S0; // big decoder only
6359 ALU : S4; // any alu
6360 MEM : S3; // any mem
6361 %}
6362
6363 // Integer mem operation (prefetch)
6364 pipe_class ialu_mem(memory mem)
6365 %{
6366 single_instruction;
6367 mem : S3(read);
6368 D0 : S0; // big decoder only
6369 MEM : S3; // any mem
6370 %}
6371
6372 // Integer Store to Memory
6373 pipe_class ialu_mem_reg(memory mem, rRegI src)
6374 %{
6375 single_instruction;
6376 mem : S3(read);
6377 src : S5(read);
6378 D0 : S0; // big decoder only
6379 ALU : S4; // any alu
6380 MEM : S3;
6381 %}
6382
6383 // // Long Store to Memory
6384 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6385 // %{
6386 // instruction_count(2);
6387 // mem : S3(read);
6388 // src : S5(read);
6389 // D0 : S0(2); // big decoder only; twice
6390 // ALU : S4(2); // any 2 alus
6391 // MEM : S3(2); // Both mems
6392 // %}
6393
6394 // Integer Store to Memory
6395 pipe_class ialu_mem_imm(memory mem)
6396 %{
6397 single_instruction;
6398 mem : S3(read);
6399 D0 : S0; // big decoder only
6400 ALU : S4; // any alu
6401 MEM : S3;
6402 %}
6403
6404 // Integer ALU0 reg-reg operation
6405 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6406 %{
6407 single_instruction;
6408 dst : S4(write);
6409 src : S3(read);
6410 D0 : S0; // Big decoder only
6411 ALU0 : S3; // only alu0
6412 %}
6413
6414 // Integer ALU0 reg-mem operation
6415 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6416 %{
6417 single_instruction;
6418 dst : S5(write);
6419 mem : S3(read);
6420 D0 : S0; // big decoder only
6421 ALU0 : S4; // ALU0 only
6422 MEM : S3; // any mem
6423 %}
6424
6425 // Integer ALU reg-reg operation
6426 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6427 %{
6428 single_instruction;
6429 cr : S4(write);
6430 src1 : S3(read);
6431 src2 : S3(read);
6432 DECODE : S0; // any decoder
6433 ALU : S3; // any alu
6434 %}
6435
6436 // Integer ALU reg-imm operation
6437 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6438 %{
6439 single_instruction;
6440 cr : S4(write);
6441 src1 : S3(read);
6442 DECODE : S0; // any decoder
6443 ALU : S3; // any alu
6444 %}
6445
6446 // Integer ALU reg-mem operation
6447 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6448 %{
6449 single_instruction;
6450 cr : S4(write);
6451 src1 : S3(read);
6452 src2 : S3(read);
6453 D0 : S0; // big decoder only
6454 ALU : S4; // any alu
6455 MEM : S3;
6456 %}
6457
6458 // Conditional move reg-reg
6459 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6460 %{
6461 instruction_count(4);
6462 y : S4(read);
6463 q : S3(read);
6464 p : S3(read);
6465 DECODE : S0(4); // any decoder
6466 %}
6467
6468 // Conditional move reg-reg
6469 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6470 %{
6471 single_instruction;
6472 dst : S4(write);
6473 src : S3(read);
6474 cr : S3(read);
6475 DECODE : S0; // any decoder
6476 %}
6477
6478 // Conditional move reg-mem
6479 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6480 %{
6481 single_instruction;
6482 dst : S4(write);
6483 src : S3(read);
6484 cr : S3(read);
6485 DECODE : S0; // any decoder
6486 MEM : S3;
6487 %}
6488
6489 // Conditional move reg-reg long
6490 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6491 %{
6492 single_instruction;
6493 dst : S4(write);
6494 src : S3(read);
6495 cr : S3(read);
6496 DECODE : S0(2); // any 2 decoders
6497 %}
6498
6499 // Float reg-reg operation
6500 pipe_class fpu_reg(regD dst)
6501 %{
6502 instruction_count(2);
6503 dst : S3(read);
6504 DECODE : S0(2); // any 2 decoders
6505 FPU : S3;
6506 %}
6507
6508 // Float reg-reg operation
6509 pipe_class fpu_reg_reg(regD dst, regD src)
6510 %{
6511 instruction_count(2);
6512 dst : S4(write);
6513 src : S3(read);
6514 DECODE : S0(2); // any 2 decoders
6515 FPU : S3;
6516 %}
6517
6518 // Float reg-reg operation
6519 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6520 %{
6521 instruction_count(3);
6522 dst : S4(write);
6523 src1 : S3(read);
6524 src2 : S3(read);
6525 DECODE : S0(3); // any 3 decoders
6526 FPU : S3(2);
6527 %}
6528
6529 // Float reg-reg operation
6530 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6531 %{
6532 instruction_count(4);
6533 dst : S4(write);
6534 src1 : S3(read);
6535 src2 : S3(read);
6536 src3 : S3(read);
6537 DECODE : S0(4); // any 3 decoders
6538 FPU : S3(2);
6539 %}
6540
6541 // Float reg-reg operation
6542 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6543 %{
6544 instruction_count(4);
6545 dst : S4(write);
6546 src1 : S3(read);
6547 src2 : S3(read);
6548 src3 : S3(read);
6549 DECODE : S1(3); // any 3 decoders
6550 D0 : S0; // Big decoder only
6551 FPU : S3(2);
6552 MEM : S3;
6553 %}
6554
6555 // Float reg-mem operation
6556 pipe_class fpu_reg_mem(regD dst, memory mem)
6557 %{
6558 instruction_count(2);
6559 dst : S5(write);
6560 mem : S3(read);
6561 D0 : S0; // big decoder only
6562 DECODE : S1; // any decoder for FPU POP
6563 FPU : S4;
6564 MEM : S3; // any mem
6565 %}
6566
6567 // Float reg-mem operation
6568 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6569 %{
6570 instruction_count(3);
6571 dst : S5(write);
6572 src1 : S3(read);
6573 mem : S3(read);
6574 D0 : S0; // big decoder only
6575 DECODE : S1(2); // any decoder for FPU POP
6576 FPU : S4;
6577 MEM : S3; // any mem
6578 %}
6579
6580 // Float mem-reg operation
6581 pipe_class fpu_mem_reg(memory mem, regD src)
6582 %{
6583 instruction_count(2);
6584 src : S5(read);
6585 mem : S3(read);
6586 DECODE : S0; // any decoder for FPU PUSH
6587 D0 : S1; // big decoder only
6588 FPU : S4;
6589 MEM : S3; // any mem
6590 %}
6591
6592 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6593 %{
6594 instruction_count(3);
6595 src1 : S3(read);
6596 src2 : S3(read);
6597 mem : S3(read);
6598 DECODE : S0(2); // any decoder for FPU PUSH
6599 D0 : S1; // big decoder only
6600 FPU : S4;
6601 MEM : S3; // any mem
6602 %}
6603
6604 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6605 %{
6606 instruction_count(3);
6607 src1 : S3(read);
6608 src2 : S3(read);
6609 mem : S4(read);
6610 DECODE : S0; // any decoder for FPU PUSH
6611 D0 : S0(2); // big decoder only
6612 FPU : S4;
6613 MEM : S3(2); // any mem
6614 %}
6615
6616 pipe_class fpu_mem_mem(memory dst, memory src1)
6617 %{
6618 instruction_count(2);
6619 src1 : S3(read);
6620 dst : S4(read);
6621 D0 : S0(2); // big decoder only
6622 MEM : S3(2); // any mem
6623 %}
6624
6625 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6626 %{
6627 instruction_count(3);
6628 src1 : S3(read);
6629 src2 : S3(read);
6630 dst : S4(read);
6631 D0 : S0(3); // big decoder only
6632 FPU : S4;
6633 MEM : S3(3); // any mem
6634 %}
6635
6636 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6637 %{
6638 instruction_count(3);
6639 src1 : S4(read);
6640 mem : S4(read);
6641 DECODE : S0; // any decoder for FPU PUSH
6642 D0 : S0(2); // big decoder only
6643 FPU : S4;
6644 MEM : S3(2); // any mem
6645 %}
6646
6647 // Float load constant
6648 pipe_class fpu_reg_con(regD dst)
6649 %{
6650 instruction_count(2);
6651 dst : S5(write);
6652 D0 : S0; // big decoder only for the load
6653 DECODE : S1; // any decoder for FPU POP
6654 FPU : S4;
6655 MEM : S3; // any mem
6656 %}
6657
6658 // Float load constant
6659 pipe_class fpu_reg_reg_con(regD dst, regD src)
6660 %{
6661 instruction_count(3);
6662 dst : S5(write);
6663 src : S3(read);
6664 D0 : S0; // big decoder only for the load
6665 DECODE : S1(2); // any decoder for FPU POP
6666 FPU : S4;
6667 MEM : S3; // any mem
6668 %}
6669
6670 // UnConditional branch
6671 pipe_class pipe_jmp(label labl)
6672 %{
6673 single_instruction;
6674 BR : S3;
6675 %}
6676
6677 // Conditional branch
6678 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6679 %{
6680 single_instruction;
6681 cr : S1(read);
6682 BR : S3;
6683 %}
6684
6685 // Allocation idiom
6686 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6687 %{
6688 instruction_count(1); force_serialization;
6689 fixed_latency(6);
6690 heap_ptr : S3(read);
6691 DECODE : S0(3);
6692 D0 : S2;
6693 MEM : S3;
6694 ALU : S3(2);
6695 dst : S5(write);
6696 BR : S5;
6697 %}
6698
6699 // Generic big/slow expanded idiom
6700 pipe_class pipe_slow()
6701 %{
6702 instruction_count(10); multiple_bundles; force_serialization;
6703 fixed_latency(100);
6704 D0 : S0(2);
6705 MEM : S3(2);
6706 %}
6707
6708 // The real do-nothing guy
6709 pipe_class empty()
6710 %{
6711 instruction_count(0);
6712 %}
6713
6714 // Define the class for the Nop node
6715 define
6716 %{
6717 MachNop = empty;
6718 %}
6719
6720 %}
6721
6722 //----------INSTRUCTIONS-------------------------------------------------------
6723 //
6724 // match -- States which machine-independent subtree may be replaced
6725 // by this instruction.
6726 // ins_cost -- The estimated cost of this instruction is used by instruction
6727 // selection to identify a minimum cost tree of machine
6728 // instructions that matches a tree of machine-independent
6729 // instructions.
6730 // format -- A string providing the disassembly for this instruction.
6731 // The value of an instruction's operand may be inserted
6732 // by referring to it with a '$' prefix.
6733 // opcode -- Three instruction opcodes may be provided. These are referred
6734 // to within an encode class as $primary, $secondary, and $tertiary
6735 // rrspectively. The primary opcode is commonly used to
6736 // indicate the type of machine instruction, while secondary
6737 // and tertiary are often used for prefix options or addressing
6738 // modes.
6739 // ins_encode -- A list of encode classes with parameters. The encode class
6740 // name must have been defined in an 'enc_class' specification
6741 // in the encode section of the architecture description.
6742
6743 // ============================================================================
6744
6745 instruct ShouldNotReachHere() %{
6746 match(Halt);
6747 format %{ "stop\t# ShouldNotReachHere" %}
6748 ins_encode %{
6749 if (is_reachable()) {
6750 const char* str = __ code_string(_halt_reason);
6751 __ stop(str);
6752 }
6753 %}
6754 ins_pipe(pipe_slow);
6755 %}
6756
6757 // ============================================================================
6758
6759 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6760 // Load Float
6761 instruct MoveF2VL(vlRegF dst, regF src) %{
6762 match(Set dst src);
6763 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6764 ins_encode %{
6765 ShouldNotReachHere();
6766 %}
6767 ins_pipe( fpu_reg_reg );
6768 %}
6769
6770 // Load Float
6771 instruct MoveF2LEG(legRegF dst, regF src) %{
6772 match(Set dst src);
6773 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6774 ins_encode %{
6775 ShouldNotReachHere();
6776 %}
6777 ins_pipe( fpu_reg_reg );
6778 %}
6779
6780 // Load Float
6781 instruct MoveVL2F(regF dst, vlRegF src) %{
6782 match(Set dst src);
6783 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6784 ins_encode %{
6785 ShouldNotReachHere();
6786 %}
6787 ins_pipe( fpu_reg_reg );
6788 %}
6789
6790 // Load Float
6791 instruct MoveLEG2F(regF dst, legRegF src) %{
6792 match(Set dst src);
6793 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6794 ins_encode %{
6795 ShouldNotReachHere();
6796 %}
6797 ins_pipe( fpu_reg_reg );
6798 %}
6799
6800 // Load Double
6801 instruct MoveD2VL(vlRegD dst, regD src) %{
6802 match(Set dst src);
6803 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6804 ins_encode %{
6805 ShouldNotReachHere();
6806 %}
6807 ins_pipe( fpu_reg_reg );
6808 %}
6809
6810 // Load Double
6811 instruct MoveD2LEG(legRegD dst, regD src) %{
6812 match(Set dst src);
6813 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6814 ins_encode %{
6815 ShouldNotReachHere();
6816 %}
6817 ins_pipe( fpu_reg_reg );
6818 %}
6819
6820 // Load Double
6821 instruct MoveVL2D(regD dst, vlRegD src) %{
6822 match(Set dst src);
6823 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6824 ins_encode %{
6825 ShouldNotReachHere();
6826 %}
6827 ins_pipe( fpu_reg_reg );
6828 %}
6829
6830 // Load Double
6831 instruct MoveLEG2D(regD dst, legRegD src) %{
6832 match(Set dst src);
6833 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6834 ins_encode %{
6835 ShouldNotReachHere();
6836 %}
6837 ins_pipe( fpu_reg_reg );
6838 %}
6839
6840 //----------Load/Store/Move Instructions---------------------------------------
6841 //----------Load Instructions--------------------------------------------------
6842
6843 // Load Byte (8 bit signed)
6844 instruct loadB(rRegI dst, memory mem)
6845 %{
6846 match(Set dst (LoadB mem));
6847
6848 ins_cost(125);
6849 format %{ "movsbl $dst, $mem\t# byte" %}
6850
6851 ins_encode %{
6852 __ movsbl($dst$$Register, $mem$$Address);
6853 %}
6854
6855 ins_pipe(ialu_reg_mem);
6856 %}
6857
6858 // Load Byte (8 bit signed) into Long Register
6859 instruct loadB2L(rRegL dst, memory mem)
6860 %{
6861 match(Set dst (ConvI2L (LoadB mem)));
6862
6863 ins_cost(125);
6864 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6865
6866 ins_encode %{
6867 __ movsbq($dst$$Register, $mem$$Address);
6868 %}
6869
6870 ins_pipe(ialu_reg_mem);
6871 %}
6872
6873 // Load Unsigned Byte (8 bit UNsigned)
6874 instruct loadUB(rRegI dst, memory mem)
6875 %{
6876 match(Set dst (LoadUB mem));
6877
6878 ins_cost(125);
6879 format %{ "movzbl $dst, $mem\t# ubyte" %}
6880
6881 ins_encode %{
6882 __ movzbl($dst$$Register, $mem$$Address);
6883 %}
6884
6885 ins_pipe(ialu_reg_mem);
6886 %}
6887
6888 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6889 instruct loadUB2L(rRegL dst, memory mem)
6890 %{
6891 match(Set dst (ConvI2L (LoadUB mem)));
6892
6893 ins_cost(125);
6894 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6895
6896 ins_encode %{
6897 __ movzbq($dst$$Register, $mem$$Address);
6898 %}
6899
6900 ins_pipe(ialu_reg_mem);
6901 %}
6902
6903 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6904 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6905 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6906 effect(KILL cr);
6907
6908 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6909 "andl $dst, right_n_bits($mask, 8)" %}
6910 ins_encode %{
6911 Register Rdst = $dst$$Register;
6912 __ movzbq(Rdst, $mem$$Address);
6913 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6914 %}
6915 ins_pipe(ialu_reg_mem);
6916 %}
6917
6918 // Load Short (16 bit signed)
6919 instruct loadS(rRegI dst, memory mem)
6920 %{
6921 match(Set dst (LoadS mem));
6922
6923 ins_cost(125);
6924 format %{ "movswl $dst, $mem\t# short" %}
6925
6926 ins_encode %{
6927 __ movswl($dst$$Register, $mem$$Address);
6928 %}
6929
6930 ins_pipe(ialu_reg_mem);
6931 %}
6932
6933 // Load Short (16 bit signed) to Byte (8 bit signed)
6934 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6935 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6936
6937 ins_cost(125);
6938 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6939 ins_encode %{
6940 __ movsbl($dst$$Register, $mem$$Address);
6941 %}
6942 ins_pipe(ialu_reg_mem);
6943 %}
6944
6945 // Load Short (16 bit signed) into Long Register
6946 instruct loadS2L(rRegL dst, memory mem)
6947 %{
6948 match(Set dst (ConvI2L (LoadS mem)));
6949
6950 ins_cost(125);
6951 format %{ "movswq $dst, $mem\t# short -> long" %}
6952
6953 ins_encode %{
6954 __ movswq($dst$$Register, $mem$$Address);
6955 %}
6956
6957 ins_pipe(ialu_reg_mem);
6958 %}
6959
6960 // Load Unsigned Short/Char (16 bit UNsigned)
6961 instruct loadUS(rRegI dst, memory mem)
6962 %{
6963 match(Set dst (LoadUS mem));
6964
6965 ins_cost(125);
6966 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6967
6968 ins_encode %{
6969 __ movzwl($dst$$Register, $mem$$Address);
6970 %}
6971
6972 ins_pipe(ialu_reg_mem);
6973 %}
6974
6975 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6976 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6977 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6978
6979 ins_cost(125);
6980 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6981 ins_encode %{
6982 __ movsbl($dst$$Register, $mem$$Address);
6983 %}
6984 ins_pipe(ialu_reg_mem);
6985 %}
6986
6987 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6988 instruct loadUS2L(rRegL dst, memory mem)
6989 %{
6990 match(Set dst (ConvI2L (LoadUS mem)));
6991
6992 ins_cost(125);
6993 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6994
6995 ins_encode %{
6996 __ movzwq($dst$$Register, $mem$$Address);
6997 %}
6998
6999 ins_pipe(ialu_reg_mem);
7000 %}
7001
7002 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7003 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7004 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7005
7006 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7007 ins_encode %{
7008 __ movzbq($dst$$Register, $mem$$Address);
7009 %}
7010 ins_pipe(ialu_reg_mem);
7011 %}
7012
7013 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7014 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7015 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7016 effect(KILL cr);
7017
7018 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7019 "andl $dst, right_n_bits($mask, 16)" %}
7020 ins_encode %{
7021 Register Rdst = $dst$$Register;
7022 __ movzwq(Rdst, $mem$$Address);
7023 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7024 %}
7025 ins_pipe(ialu_reg_mem);
7026 %}
7027
7028 // Load Integer
7029 instruct loadI(rRegI dst, memory mem)
7030 %{
7031 match(Set dst (LoadI mem));
7032
7033 ins_cost(125);
7034 format %{ "movl $dst, $mem\t# int" %}
7035
7036 ins_encode %{
7037 __ movl($dst$$Register, $mem$$Address);
7038 %}
7039
7040 ins_pipe(ialu_reg_mem);
7041 %}
7042
7043 // Load Integer (32 bit signed) to Byte (8 bit signed)
7044 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7045 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7046
7047 ins_cost(125);
7048 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7049 ins_encode %{
7050 __ movsbl($dst$$Register, $mem$$Address);
7051 %}
7052 ins_pipe(ialu_reg_mem);
7053 %}
7054
7055 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7056 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7057 match(Set dst (AndI (LoadI mem) mask));
7058
7059 ins_cost(125);
7060 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7061 ins_encode %{
7062 __ movzbl($dst$$Register, $mem$$Address);
7063 %}
7064 ins_pipe(ialu_reg_mem);
7065 %}
7066
7067 // Load Integer (32 bit signed) to Short (16 bit signed)
7068 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7069 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7070
7071 ins_cost(125);
7072 format %{ "movswl $dst, $mem\t# int -> short" %}
7073 ins_encode %{
7074 __ movswl($dst$$Register, $mem$$Address);
7075 %}
7076 ins_pipe(ialu_reg_mem);
7077 %}
7078
7079 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7080 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7081 match(Set dst (AndI (LoadI mem) mask));
7082
7083 ins_cost(125);
7084 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7085 ins_encode %{
7086 __ movzwl($dst$$Register, $mem$$Address);
7087 %}
7088 ins_pipe(ialu_reg_mem);
7089 %}
7090
7091 // Load Integer into Long Register
7092 instruct loadI2L(rRegL dst, memory mem)
7093 %{
7094 match(Set dst (ConvI2L (LoadI mem)));
7095
7096 ins_cost(125);
7097 format %{ "movslq $dst, $mem\t# int -> long" %}
7098
7099 ins_encode %{
7100 __ movslq($dst$$Register, $mem$$Address);
7101 %}
7102
7103 ins_pipe(ialu_reg_mem);
7104 %}
7105
7106 // Load Integer with mask 0xFF into Long Register
7107 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7108 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7109
7110 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7111 ins_encode %{
7112 __ movzbq($dst$$Register, $mem$$Address);
7113 %}
7114 ins_pipe(ialu_reg_mem);
7115 %}
7116
7117 // Load Integer with mask 0xFFFF into Long Register
7118 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7119 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7120
7121 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7122 ins_encode %{
7123 __ movzwq($dst$$Register, $mem$$Address);
7124 %}
7125 ins_pipe(ialu_reg_mem);
7126 %}
7127
7128 // Load Integer with a 31-bit mask into Long Register
7129 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7130 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7131 effect(KILL cr);
7132
7133 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7134 "andl $dst, $mask" %}
7135 ins_encode %{
7136 Register Rdst = $dst$$Register;
7137 __ movl(Rdst, $mem$$Address);
7138 __ andl(Rdst, $mask$$constant);
7139 %}
7140 ins_pipe(ialu_reg_mem);
7141 %}
7142
7143 // Load Unsigned Integer into Long Register
7144 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7145 %{
7146 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7147
7148 ins_cost(125);
7149 format %{ "movl $dst, $mem\t# uint -> long" %}
7150
7151 ins_encode %{
7152 __ movl($dst$$Register, $mem$$Address);
7153 %}
7154
7155 ins_pipe(ialu_reg_mem);
7156 %}
7157
7158 // Load Long
7159 instruct loadL(rRegL dst, memory mem)
7160 %{
7161 match(Set dst (LoadL mem));
7162
7163 ins_cost(125);
7164 format %{ "movq $dst, $mem\t# long" %}
7165
7166 ins_encode %{
7167 __ movq($dst$$Register, $mem$$Address);
7168 %}
7169
7170 ins_pipe(ialu_reg_mem); // XXX
7171 %}
7172
7173 // Load Range
7174 instruct loadRange(rRegI dst, memory mem)
7175 %{
7176 match(Set dst (LoadRange mem));
7177
7178 ins_cost(125); // XXX
7179 format %{ "movl $dst, $mem\t# range" %}
7180 ins_encode %{
7181 __ movl($dst$$Register, $mem$$Address);
7182 %}
7183 ins_pipe(ialu_reg_mem);
7184 %}
7185
7186 // Load Pointer
7187 instruct loadP(rRegP dst, memory mem)
7188 %{
7189 match(Set dst (LoadP mem));
7190 predicate(n->as_Load()->barrier_data() == 0);
7191
7192 ins_cost(125); // XXX
7193 format %{ "movq $dst, $mem\t# ptr" %}
7194 ins_encode %{
7195 __ movq($dst$$Register, $mem$$Address);
7196 %}
7197 ins_pipe(ialu_reg_mem); // XXX
7198 %}
7199
7200 // Load Compressed Pointer
7201 instruct loadN(rRegN dst, memory mem)
7202 %{
7203 predicate(n->as_Load()->barrier_data() == 0);
7204 match(Set dst (LoadN mem));
7205
7206 ins_cost(125); // XXX
7207 format %{ "movl $dst, $mem\t# compressed ptr" %}
7208 ins_encode %{
7209 __ movl($dst$$Register, $mem$$Address);
7210 %}
7211 ins_pipe(ialu_reg_mem); // XXX
7212 %}
7213
7214
7215 // Load Klass Pointer
7216 instruct loadKlass(rRegP dst, memory mem)
7217 %{
7218 match(Set dst (LoadKlass mem));
7219
7220 ins_cost(125); // XXX
7221 format %{ "movq $dst, $mem\t# class" %}
7222 ins_encode %{
7223 __ movq($dst$$Register, $mem$$Address);
7224 %}
7225 ins_pipe(ialu_reg_mem); // XXX
7226 %}
7227
7228 // Load narrow Klass Pointer
7229 instruct loadNKlass(rRegN dst, memory mem)
7230 %{
7231 predicate(!UseCompactObjectHeaders);
7232 match(Set dst (LoadNKlass mem));
7233
7234 ins_cost(125); // XXX
7235 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7236 ins_encode %{
7237 __ movl($dst$$Register, $mem$$Address);
7238 %}
7239 ins_pipe(ialu_reg_mem); // XXX
7240 %}
7241
7242 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7243 %{
7244 predicate(UseCompactObjectHeaders);
7245 match(Set dst (LoadNKlass mem));
7246 effect(KILL cr);
7247 ins_cost(125);
7248 format %{
7249 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7250 "shrl $dst, markWord::klass_shift_at_offset"
7251 %}
7252 ins_encode %{
7253 if (UseAPX) {
7254 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7255 }
7256 else {
7257 __ movl($dst$$Register, $mem$$Address);
7258 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7259 }
7260 %}
7261 ins_pipe(ialu_reg_mem);
7262 %}
7263
7264 // Load Float
7265 instruct loadF(regF dst, memory mem)
7266 %{
7267 match(Set dst (LoadF mem));
7268
7269 ins_cost(145); // XXX
7270 format %{ "movss $dst, $mem\t# float" %}
7271 ins_encode %{
7272 __ movflt($dst$$XMMRegister, $mem$$Address);
7273 %}
7274 ins_pipe(pipe_slow); // XXX
7275 %}
7276
7277 // Load Double
7278 instruct loadD_partial(regD dst, memory mem)
7279 %{
7280 predicate(!UseXmmLoadAndClearUpper);
7281 match(Set dst (LoadD mem));
7282
7283 ins_cost(145); // XXX
7284 format %{ "movlpd $dst, $mem\t# double" %}
7285 ins_encode %{
7286 __ movdbl($dst$$XMMRegister, $mem$$Address);
7287 %}
7288 ins_pipe(pipe_slow); // XXX
7289 %}
7290
7291 instruct loadD(regD dst, memory mem)
7292 %{
7293 predicate(UseXmmLoadAndClearUpper);
7294 match(Set dst (LoadD mem));
7295
7296 ins_cost(145); // XXX
7297 format %{ "movsd $dst, $mem\t# double" %}
7298 ins_encode %{
7299 __ movdbl($dst$$XMMRegister, $mem$$Address);
7300 %}
7301 ins_pipe(pipe_slow); // XXX
7302 %}
7303
7304 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7305 %{
7306 match(Set dst con);
7307
7308 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7309
7310 ins_encode %{
7311 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7312 %}
7313
7314 ins_pipe(ialu_reg_fat);
7315 %}
7316
7317 // max = java.lang.Math.max(float a, float b)
7318 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7319 predicate(VM_Version::supports_avx10_2());
7320 match(Set dst (MaxF a b));
7321 format %{ "maxF $dst, $a, $b" %}
7322 ins_encode %{
7323 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7324 %}
7325 ins_pipe( pipe_slow );
7326 %}
7327
7328 // max = java.lang.Math.max(float a, float b)
7329 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7330 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7331 match(Set dst (MaxF a b));
7332 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7333 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7334 ins_encode %{
7335 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7336 %}
7337 ins_pipe( pipe_slow );
7338 %}
7339
7340 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7341 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7342 match(Set dst (MaxF a b));
7343 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7344
7345 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7346 ins_encode %{
7347 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7348 false /*min*/, true /*single*/);
7349 %}
7350 ins_pipe( pipe_slow );
7351 %}
7352
7353 // max = java.lang.Math.max(double a, double b)
7354 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7355 predicate(VM_Version::supports_avx10_2());
7356 match(Set dst (MaxD a b));
7357 format %{ "maxD $dst, $a, $b" %}
7358 ins_encode %{
7359 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7360 %}
7361 ins_pipe( pipe_slow );
7362 %}
7363
7364 // max = java.lang.Math.max(double a, double b)
7365 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7366 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7367 match(Set dst (MaxD a b));
7368 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7369 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7370 ins_encode %{
7371 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7372 %}
7373 ins_pipe( pipe_slow );
7374 %}
7375
7376 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7377 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7378 match(Set dst (MaxD a b));
7379 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7380
7381 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7382 ins_encode %{
7383 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7384 false /*min*/, false /*single*/);
7385 %}
7386 ins_pipe( pipe_slow );
7387 %}
7388
7389 // max = java.lang.Math.min(float a, float b)
7390 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7391 predicate(VM_Version::supports_avx10_2());
7392 match(Set dst (MinF a b));
7393 format %{ "minF $dst, $a, $b" %}
7394 ins_encode %{
7395 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7396 %}
7397 ins_pipe( pipe_slow );
7398 %}
7399
7400 // min = java.lang.Math.min(float a, float b)
7401 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7402 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7403 match(Set dst (MinF a b));
7404 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7405 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7406 ins_encode %{
7407 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7408 %}
7409 ins_pipe( pipe_slow );
7410 %}
7411
7412 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7413 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7414 match(Set dst (MinF a b));
7415 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7416
7417 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7418 ins_encode %{
7419 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7420 true /*min*/, true /*single*/);
7421 %}
7422 ins_pipe( pipe_slow );
7423 %}
7424
7425 // max = java.lang.Math.min(double a, double b)
7426 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7427 predicate(VM_Version::supports_avx10_2());
7428 match(Set dst (MinD a b));
7429 format %{ "minD $dst, $a, $b" %}
7430 ins_encode %{
7431 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7432 %}
7433 ins_pipe( pipe_slow );
7434 %}
7435
7436 // min = java.lang.Math.min(double a, double b)
7437 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7438 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7439 match(Set dst (MinD a b));
7440 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7441 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7442 ins_encode %{
7443 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7444 %}
7445 ins_pipe( pipe_slow );
7446 %}
7447
7448 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7449 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7450 match(Set dst (MinD a b));
7451 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7452
7453 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7454 ins_encode %{
7455 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7456 true /*min*/, false /*single*/);
7457 %}
7458 ins_pipe( pipe_slow );
7459 %}
7460
7461 // Load Effective Address
7462 instruct leaP8(rRegP dst, indOffset8 mem)
7463 %{
7464 match(Set dst mem);
7465
7466 ins_cost(110); // XXX
7467 format %{ "leaq $dst, $mem\t# ptr 8" %}
7468 ins_encode %{
7469 __ leaq($dst$$Register, $mem$$Address);
7470 %}
7471 ins_pipe(ialu_reg_reg_fat);
7472 %}
7473
7474 instruct leaP32(rRegP dst, indOffset32 mem)
7475 %{
7476 match(Set dst mem);
7477
7478 ins_cost(110);
7479 format %{ "leaq $dst, $mem\t# ptr 32" %}
7480 ins_encode %{
7481 __ leaq($dst$$Register, $mem$$Address);
7482 %}
7483 ins_pipe(ialu_reg_reg_fat);
7484 %}
7485
7486 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7487 %{
7488 match(Set dst mem);
7489
7490 ins_cost(110);
7491 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7492 ins_encode %{
7493 __ leaq($dst$$Register, $mem$$Address);
7494 %}
7495 ins_pipe(ialu_reg_reg_fat);
7496 %}
7497
7498 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7499 %{
7500 match(Set dst mem);
7501
7502 ins_cost(110);
7503 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7504 ins_encode %{
7505 __ leaq($dst$$Register, $mem$$Address);
7506 %}
7507 ins_pipe(ialu_reg_reg_fat);
7508 %}
7509
7510 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7511 %{
7512 match(Set dst mem);
7513
7514 ins_cost(110);
7515 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7516 ins_encode %{
7517 __ leaq($dst$$Register, $mem$$Address);
7518 %}
7519 ins_pipe(ialu_reg_reg_fat);
7520 %}
7521
7522 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7523 %{
7524 match(Set dst mem);
7525
7526 ins_cost(110);
7527 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7528 ins_encode %{
7529 __ leaq($dst$$Register, $mem$$Address);
7530 %}
7531 ins_pipe(ialu_reg_reg_fat);
7532 %}
7533
7534 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7535 %{
7536 match(Set dst mem);
7537
7538 ins_cost(110);
7539 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7540 ins_encode %{
7541 __ leaq($dst$$Register, $mem$$Address);
7542 %}
7543 ins_pipe(ialu_reg_reg_fat);
7544 %}
7545
7546 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7547 %{
7548 match(Set dst mem);
7549
7550 ins_cost(110);
7551 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7552 ins_encode %{
7553 __ leaq($dst$$Register, $mem$$Address);
7554 %}
7555 ins_pipe(ialu_reg_reg_fat);
7556 %}
7557
7558 // Load Effective Address which uses Narrow (32-bits) oop
7559 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7560 %{
7561 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7562 match(Set dst mem);
7563
7564 ins_cost(110);
7565 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7566 ins_encode %{
7567 __ leaq($dst$$Register, $mem$$Address);
7568 %}
7569 ins_pipe(ialu_reg_reg_fat);
7570 %}
7571
7572 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7573 %{
7574 predicate(CompressedOops::shift() == 0);
7575 match(Set dst mem);
7576
7577 ins_cost(110); // XXX
7578 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7579 ins_encode %{
7580 __ leaq($dst$$Register, $mem$$Address);
7581 %}
7582 ins_pipe(ialu_reg_reg_fat);
7583 %}
7584
7585 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7586 %{
7587 predicate(CompressedOops::shift() == 0);
7588 match(Set dst mem);
7589
7590 ins_cost(110);
7591 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7592 ins_encode %{
7593 __ leaq($dst$$Register, $mem$$Address);
7594 %}
7595 ins_pipe(ialu_reg_reg_fat);
7596 %}
7597
7598 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7599 %{
7600 predicate(CompressedOops::shift() == 0);
7601 match(Set dst mem);
7602
7603 ins_cost(110);
7604 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7605 ins_encode %{
7606 __ leaq($dst$$Register, $mem$$Address);
7607 %}
7608 ins_pipe(ialu_reg_reg_fat);
7609 %}
7610
7611 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7612 %{
7613 predicate(CompressedOops::shift() == 0);
7614 match(Set dst mem);
7615
7616 ins_cost(110);
7617 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7618 ins_encode %{
7619 __ leaq($dst$$Register, $mem$$Address);
7620 %}
7621 ins_pipe(ialu_reg_reg_fat);
7622 %}
7623
7624 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7625 %{
7626 predicate(CompressedOops::shift() == 0);
7627 match(Set dst mem);
7628
7629 ins_cost(110);
7630 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7631 ins_encode %{
7632 __ leaq($dst$$Register, $mem$$Address);
7633 %}
7634 ins_pipe(ialu_reg_reg_fat);
7635 %}
7636
7637 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7638 %{
7639 predicate(CompressedOops::shift() == 0);
7640 match(Set dst mem);
7641
7642 ins_cost(110);
7643 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7644 ins_encode %{
7645 __ leaq($dst$$Register, $mem$$Address);
7646 %}
7647 ins_pipe(ialu_reg_reg_fat);
7648 %}
7649
7650 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7651 %{
7652 predicate(CompressedOops::shift() == 0);
7653 match(Set dst mem);
7654
7655 ins_cost(110);
7656 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7657 ins_encode %{
7658 __ leaq($dst$$Register, $mem$$Address);
7659 %}
7660 ins_pipe(ialu_reg_reg_fat);
7661 %}
7662
7663 instruct loadConI(rRegI dst, immI src)
7664 %{
7665 match(Set dst src);
7666
7667 format %{ "movl $dst, $src\t# int" %}
7668 ins_encode %{
7669 __ movl($dst$$Register, $src$$constant);
7670 %}
7671 ins_pipe(ialu_reg_fat); // XXX
7672 %}
7673
7674 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7675 %{
7676 match(Set dst src);
7677 effect(KILL cr);
7678
7679 ins_cost(50);
7680 format %{ "xorl $dst, $dst\t# int" %}
7681 ins_encode %{
7682 __ xorl($dst$$Register, $dst$$Register);
7683 %}
7684 ins_pipe(ialu_reg);
7685 %}
7686
7687 instruct loadConL(rRegL dst, immL src)
7688 %{
7689 match(Set dst src);
7690
7691 ins_cost(150);
7692 format %{ "movq $dst, $src\t# long" %}
7693 ins_encode %{
7694 __ mov64($dst$$Register, $src$$constant);
7695 %}
7696 ins_pipe(ialu_reg);
7697 %}
7698
7699 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7700 %{
7701 match(Set dst src);
7702 effect(KILL cr);
7703
7704 ins_cost(50);
7705 format %{ "xorl $dst, $dst\t# long" %}
7706 ins_encode %{
7707 __ xorl($dst$$Register, $dst$$Register);
7708 %}
7709 ins_pipe(ialu_reg); // XXX
7710 %}
7711
7712 instruct loadConUL32(rRegL dst, immUL32 src)
7713 %{
7714 match(Set dst src);
7715
7716 ins_cost(60);
7717 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7718 ins_encode %{
7719 __ movl($dst$$Register, $src$$constant);
7720 %}
7721 ins_pipe(ialu_reg);
7722 %}
7723
7724 instruct loadConL32(rRegL dst, immL32 src)
7725 %{
7726 match(Set dst src);
7727
7728 ins_cost(70);
7729 format %{ "movq $dst, $src\t# long (32-bit)" %}
7730 ins_encode %{
7731 __ movq($dst$$Register, $src$$constant);
7732 %}
7733 ins_pipe(ialu_reg);
7734 %}
7735
7736 instruct loadConP(rRegP dst, immP con) %{
7737 match(Set dst con);
7738
7739 format %{ "movq $dst, $con\t# ptr" %}
7740 ins_encode %{
7741 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7742 %}
7743 ins_pipe(ialu_reg_fat); // XXX
7744 %}
7745
7746 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7747 %{
7748 match(Set dst src);
7749 effect(KILL cr);
7750
7751 ins_cost(50);
7752 format %{ "xorl $dst, $dst\t# ptr" %}
7753 ins_encode %{
7754 __ xorl($dst$$Register, $dst$$Register);
7755 %}
7756 ins_pipe(ialu_reg);
7757 %}
7758
7759 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7760 %{
7761 match(Set dst src);
7762 effect(KILL cr);
7763
7764 ins_cost(60);
7765 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7766 ins_encode %{
7767 __ movl($dst$$Register, $src$$constant);
7768 %}
7769 ins_pipe(ialu_reg);
7770 %}
7771
7772 instruct loadConF(regF dst, immF con) %{
7773 match(Set dst con);
7774 ins_cost(125);
7775 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7776 ins_encode %{
7777 __ movflt($dst$$XMMRegister, $constantaddress($con));
7778 %}
7779 ins_pipe(pipe_slow);
7780 %}
7781
7782 instruct loadConH(regF dst, immH con) %{
7783 match(Set dst con);
7784 ins_cost(125);
7785 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7786 ins_encode %{
7787 __ movflt($dst$$XMMRegister, $constantaddress($con));
7788 %}
7789 ins_pipe(pipe_slow);
7790 %}
7791
7792 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7793 match(Set dst src);
7794 effect(KILL cr);
7795 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7796 ins_encode %{
7797 __ xorq($dst$$Register, $dst$$Register);
7798 %}
7799 ins_pipe(ialu_reg);
7800 %}
7801
7802 instruct loadConN(rRegN dst, immN src) %{
7803 match(Set dst src);
7804
7805 ins_cost(125);
7806 format %{ "movl $dst, $src\t# compressed ptr" %}
7807 ins_encode %{
7808 address con = (address)$src$$constant;
7809 if (con == nullptr) {
7810 ShouldNotReachHere();
7811 } else {
7812 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7813 }
7814 %}
7815 ins_pipe(ialu_reg_fat); // XXX
7816 %}
7817
7818 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7819 match(Set dst src);
7820
7821 ins_cost(125);
7822 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7823 ins_encode %{
7824 address con = (address)$src$$constant;
7825 if (con == nullptr) {
7826 ShouldNotReachHere();
7827 } else {
7828 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7829 }
7830 %}
7831 ins_pipe(ialu_reg_fat); // XXX
7832 %}
7833
7834 instruct loadConF0(regF dst, immF0 src)
7835 %{
7836 match(Set dst src);
7837 ins_cost(100);
7838
7839 format %{ "xorps $dst, $dst\t# float 0.0" %}
7840 ins_encode %{
7841 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7842 %}
7843 ins_pipe(pipe_slow);
7844 %}
7845
7846 // Use the same format since predicate() can not be used here.
7847 instruct loadConD(regD dst, immD con) %{
7848 match(Set dst con);
7849 ins_cost(125);
7850 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7851 ins_encode %{
7852 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7853 %}
7854 ins_pipe(pipe_slow);
7855 %}
7856
7857 instruct loadConD0(regD dst, immD0 src)
7858 %{
7859 match(Set dst src);
7860 ins_cost(100);
7861
7862 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7863 ins_encode %{
7864 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7865 %}
7866 ins_pipe(pipe_slow);
7867 %}
7868
7869 instruct loadSSI(rRegI dst, stackSlotI src)
7870 %{
7871 match(Set dst src);
7872
7873 ins_cost(125);
7874 format %{ "movl $dst, $src\t# int stk" %}
7875 ins_encode %{
7876 __ movl($dst$$Register, $src$$Address);
7877 %}
7878 ins_pipe(ialu_reg_mem);
7879 %}
7880
7881 instruct loadSSL(rRegL dst, stackSlotL src)
7882 %{
7883 match(Set dst src);
7884
7885 ins_cost(125);
7886 format %{ "movq $dst, $src\t# long stk" %}
7887 ins_encode %{
7888 __ movq($dst$$Register, $src$$Address);
7889 %}
7890 ins_pipe(ialu_reg_mem);
7891 %}
7892
7893 instruct loadSSP(rRegP dst, stackSlotP src)
7894 %{
7895 match(Set dst src);
7896
7897 ins_cost(125);
7898 format %{ "movq $dst, $src\t# ptr stk" %}
7899 ins_encode %{
7900 __ movq($dst$$Register, $src$$Address);
7901 %}
7902 ins_pipe(ialu_reg_mem);
7903 %}
7904
7905 instruct loadSSF(regF dst, stackSlotF src)
7906 %{
7907 match(Set dst src);
7908
7909 ins_cost(125);
7910 format %{ "movss $dst, $src\t# float stk" %}
7911 ins_encode %{
7912 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7913 %}
7914 ins_pipe(pipe_slow); // XXX
7915 %}
7916
7917 // Use the same format since predicate() can not be used here.
7918 instruct loadSSD(regD dst, stackSlotD src)
7919 %{
7920 match(Set dst src);
7921
7922 ins_cost(125);
7923 format %{ "movsd $dst, $src\t# double stk" %}
7924 ins_encode %{
7925 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7926 %}
7927 ins_pipe(pipe_slow); // XXX
7928 %}
7929
7930 // Prefetch instructions for allocation.
7931 // Must be safe to execute with invalid address (cannot fault).
7932
7933 instruct prefetchAlloc( memory mem ) %{
7934 predicate(AllocatePrefetchInstr==3);
7935 match(PrefetchAllocation mem);
7936 ins_cost(125);
7937
7938 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7939 ins_encode %{
7940 __ prefetchw($mem$$Address);
7941 %}
7942 ins_pipe(ialu_mem);
7943 %}
7944
7945 instruct prefetchAllocNTA( memory mem ) %{
7946 predicate(AllocatePrefetchInstr==0);
7947 match(PrefetchAllocation mem);
7948 ins_cost(125);
7949
7950 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7951 ins_encode %{
7952 __ prefetchnta($mem$$Address);
7953 %}
7954 ins_pipe(ialu_mem);
7955 %}
7956
7957 instruct prefetchAllocT0( memory mem ) %{
7958 predicate(AllocatePrefetchInstr==1);
7959 match(PrefetchAllocation mem);
7960 ins_cost(125);
7961
7962 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7963 ins_encode %{
7964 __ prefetcht0($mem$$Address);
7965 %}
7966 ins_pipe(ialu_mem);
7967 %}
7968
7969 instruct prefetchAllocT2( memory mem ) %{
7970 predicate(AllocatePrefetchInstr==2);
7971 match(PrefetchAllocation mem);
7972 ins_cost(125);
7973
7974 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7975 ins_encode %{
7976 __ prefetcht2($mem$$Address);
7977 %}
7978 ins_pipe(ialu_mem);
7979 %}
7980
7981 //----------Store Instructions-------------------------------------------------
7982
7983 // Store Byte
7984 instruct storeB(memory mem, rRegI src)
7985 %{
7986 match(Set mem (StoreB mem src));
7987
7988 ins_cost(125); // XXX
7989 format %{ "movb $mem, $src\t# byte" %}
7990 ins_encode %{
7991 __ movb($mem$$Address, $src$$Register);
7992 %}
7993 ins_pipe(ialu_mem_reg);
7994 %}
7995
7996 // Store Char/Short
7997 instruct storeC(memory mem, rRegI src)
7998 %{
7999 match(Set mem (StoreC mem src));
8000
8001 ins_cost(125); // XXX
8002 format %{ "movw $mem, $src\t# char/short" %}
8003 ins_encode %{
8004 __ movw($mem$$Address, $src$$Register);
8005 %}
8006 ins_pipe(ialu_mem_reg);
8007 %}
8008
8009 // Store Integer
8010 instruct storeI(memory mem, rRegI src)
8011 %{
8012 match(Set mem (StoreI mem src));
8013
8014 ins_cost(125); // XXX
8015 format %{ "movl $mem, $src\t# int" %}
8016 ins_encode %{
8017 __ movl($mem$$Address, $src$$Register);
8018 %}
8019 ins_pipe(ialu_mem_reg);
8020 %}
8021
8022 // Store Long
8023 instruct storeL(memory mem, rRegL src)
8024 %{
8025 match(Set mem (StoreL mem src));
8026
8027 ins_cost(125); // XXX
8028 format %{ "movq $mem, $src\t# long" %}
8029 ins_encode %{
8030 __ movq($mem$$Address, $src$$Register);
8031 %}
8032 ins_pipe(ialu_mem_reg); // XXX
8033 %}
8034
8035 // Store Pointer
8036 instruct storeP(memory mem, any_RegP src)
8037 %{
8038 predicate(n->as_Store()->barrier_data() == 0);
8039 match(Set mem (StoreP mem src));
8040
8041 ins_cost(125); // XXX
8042 format %{ "movq $mem, $src\t# ptr" %}
8043 ins_encode %{
8044 __ movq($mem$$Address, $src$$Register);
8045 %}
8046 ins_pipe(ialu_mem_reg);
8047 %}
8048
8049 instruct storeImmP0(memory mem, immP0 zero)
8050 %{
8051 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8052 match(Set mem (StoreP mem zero));
8053
8054 ins_cost(125); // XXX
8055 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8056 ins_encode %{
8057 __ movq($mem$$Address, r12);
8058 %}
8059 ins_pipe(ialu_mem_reg);
8060 %}
8061
8062 // Store Null Pointer, mark word, or other simple pointer constant.
8063 instruct storeImmP(memory mem, immP31 src)
8064 %{
8065 predicate(n->as_Store()->barrier_data() == 0);
8066 match(Set mem (StoreP mem src));
8067
8068 ins_cost(150); // XXX
8069 format %{ "movq $mem, $src\t# ptr" %}
8070 ins_encode %{
8071 __ movq($mem$$Address, $src$$constant);
8072 %}
8073 ins_pipe(ialu_mem_imm);
8074 %}
8075
8076 // Store Compressed Pointer
8077 instruct storeN(memory mem, rRegN src)
8078 %{
8079 predicate(n->as_Store()->barrier_data() == 0);
8080 match(Set mem (StoreN mem src));
8081
8082 ins_cost(125); // XXX
8083 format %{ "movl $mem, $src\t# compressed ptr" %}
8084 ins_encode %{
8085 __ movl($mem$$Address, $src$$Register);
8086 %}
8087 ins_pipe(ialu_mem_reg);
8088 %}
8089
8090 instruct storeNKlass(memory mem, rRegN src)
8091 %{
8092 match(Set mem (StoreNKlass mem src));
8093
8094 ins_cost(125); // XXX
8095 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8096 ins_encode %{
8097 __ movl($mem$$Address, $src$$Register);
8098 %}
8099 ins_pipe(ialu_mem_reg);
8100 %}
8101
8102 instruct storeImmN0(memory mem, immN0 zero)
8103 %{
8104 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8105 match(Set mem (StoreN mem zero));
8106
8107 ins_cost(125); // XXX
8108 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8109 ins_encode %{
8110 __ movl($mem$$Address, r12);
8111 %}
8112 ins_pipe(ialu_mem_reg);
8113 %}
8114
8115 instruct storeImmN(memory mem, immN src)
8116 %{
8117 predicate(n->as_Store()->barrier_data() == 0);
8118 match(Set mem (StoreN mem src));
8119
8120 ins_cost(150); // XXX
8121 format %{ "movl $mem, $src\t# compressed ptr" %}
8122 ins_encode %{
8123 address con = (address)$src$$constant;
8124 if (con == nullptr) {
8125 __ movl($mem$$Address, 0);
8126 } else {
8127 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8128 }
8129 %}
8130 ins_pipe(ialu_mem_imm);
8131 %}
8132
8133 instruct storeImmNKlass(memory mem, immNKlass src)
8134 %{
8135 match(Set mem (StoreNKlass mem src));
8136
8137 ins_cost(150); // XXX
8138 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8139 ins_encode %{
8140 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8141 %}
8142 ins_pipe(ialu_mem_imm);
8143 %}
8144
8145 // Store Integer Immediate
8146 instruct storeImmI0(memory mem, immI_0 zero)
8147 %{
8148 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8149 match(Set mem (StoreI mem zero));
8150
8151 ins_cost(125); // XXX
8152 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8153 ins_encode %{
8154 __ movl($mem$$Address, r12);
8155 %}
8156 ins_pipe(ialu_mem_reg);
8157 %}
8158
8159 instruct storeImmI(memory mem, immI src)
8160 %{
8161 match(Set mem (StoreI mem src));
8162
8163 ins_cost(150);
8164 format %{ "movl $mem, $src\t# int" %}
8165 ins_encode %{
8166 __ movl($mem$$Address, $src$$constant);
8167 %}
8168 ins_pipe(ialu_mem_imm);
8169 %}
8170
8171 // Store Long Immediate
8172 instruct storeImmL0(memory mem, immL0 zero)
8173 %{
8174 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8175 match(Set mem (StoreL mem zero));
8176
8177 ins_cost(125); // XXX
8178 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8179 ins_encode %{
8180 __ movq($mem$$Address, r12);
8181 %}
8182 ins_pipe(ialu_mem_reg);
8183 %}
8184
8185 instruct storeImmL(memory mem, immL32 src)
8186 %{
8187 match(Set mem (StoreL mem src));
8188
8189 ins_cost(150);
8190 format %{ "movq $mem, $src\t# long" %}
8191 ins_encode %{
8192 __ movq($mem$$Address, $src$$constant);
8193 %}
8194 ins_pipe(ialu_mem_imm);
8195 %}
8196
8197 // Store Short/Char Immediate
8198 instruct storeImmC0(memory mem, immI_0 zero)
8199 %{
8200 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8201 match(Set mem (StoreC mem zero));
8202
8203 ins_cost(125); // XXX
8204 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8205 ins_encode %{
8206 __ movw($mem$$Address, r12);
8207 %}
8208 ins_pipe(ialu_mem_reg);
8209 %}
8210
8211 instruct storeImmI16(memory mem, immI16 src)
8212 %{
8213 predicate(UseStoreImmI16);
8214 match(Set mem (StoreC mem src));
8215
8216 ins_cost(150);
8217 format %{ "movw $mem, $src\t# short/char" %}
8218 ins_encode %{
8219 __ movw($mem$$Address, $src$$constant);
8220 %}
8221 ins_pipe(ialu_mem_imm);
8222 %}
8223
8224 // Store Byte Immediate
8225 instruct storeImmB0(memory mem, immI_0 zero)
8226 %{
8227 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8228 match(Set mem (StoreB mem zero));
8229
8230 ins_cost(125); // XXX
8231 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8232 ins_encode %{
8233 __ movb($mem$$Address, r12);
8234 %}
8235 ins_pipe(ialu_mem_reg);
8236 %}
8237
8238 instruct storeImmB(memory mem, immI8 src)
8239 %{
8240 match(Set mem (StoreB mem src));
8241
8242 ins_cost(150); // XXX
8243 format %{ "movb $mem, $src\t# byte" %}
8244 ins_encode %{
8245 __ movb($mem$$Address, $src$$constant);
8246 %}
8247 ins_pipe(ialu_mem_imm);
8248 %}
8249
8250 // Store Float
8251 instruct storeF(memory mem, regF src)
8252 %{
8253 match(Set mem (StoreF mem src));
8254
8255 ins_cost(95); // XXX
8256 format %{ "movss $mem, $src\t# float" %}
8257 ins_encode %{
8258 __ movflt($mem$$Address, $src$$XMMRegister);
8259 %}
8260 ins_pipe(pipe_slow); // XXX
8261 %}
8262
8263 // Store immediate Float value (it is faster than store from XMM register)
8264 instruct storeF0(memory mem, immF0 zero)
8265 %{
8266 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8267 match(Set mem (StoreF mem zero));
8268
8269 ins_cost(25); // XXX
8270 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8271 ins_encode %{
8272 __ movl($mem$$Address, r12);
8273 %}
8274 ins_pipe(ialu_mem_reg);
8275 %}
8276
8277 instruct storeF_imm(memory mem, immF src)
8278 %{
8279 match(Set mem (StoreF mem src));
8280
8281 ins_cost(50);
8282 format %{ "movl $mem, $src\t# float" %}
8283 ins_encode %{
8284 __ movl($mem$$Address, jint_cast($src$$constant));
8285 %}
8286 ins_pipe(ialu_mem_imm);
8287 %}
8288
8289 // Store Double
8290 instruct storeD(memory mem, regD src)
8291 %{
8292 match(Set mem (StoreD mem src));
8293
8294 ins_cost(95); // XXX
8295 format %{ "movsd $mem, $src\t# double" %}
8296 ins_encode %{
8297 __ movdbl($mem$$Address, $src$$XMMRegister);
8298 %}
8299 ins_pipe(pipe_slow); // XXX
8300 %}
8301
8302 // Store immediate double 0.0 (it is faster than store from XMM register)
8303 instruct storeD0_imm(memory mem, immD0 src)
8304 %{
8305 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8306 match(Set mem (StoreD mem src));
8307
8308 ins_cost(50);
8309 format %{ "movq $mem, $src\t# double 0." %}
8310 ins_encode %{
8311 __ movq($mem$$Address, $src$$constant);
8312 %}
8313 ins_pipe(ialu_mem_imm);
8314 %}
8315
8316 instruct storeD0(memory mem, immD0 zero)
8317 %{
8318 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8319 match(Set mem (StoreD mem zero));
8320
8321 ins_cost(25); // XXX
8322 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8323 ins_encode %{
8324 __ movq($mem$$Address, r12);
8325 %}
8326 ins_pipe(ialu_mem_reg);
8327 %}
8328
8329 instruct storeSSI(stackSlotI dst, rRegI src)
8330 %{
8331 match(Set dst src);
8332
8333 ins_cost(100);
8334 format %{ "movl $dst, $src\t# int stk" %}
8335 ins_encode %{
8336 __ movl($dst$$Address, $src$$Register);
8337 %}
8338 ins_pipe( ialu_mem_reg );
8339 %}
8340
8341 instruct storeSSL(stackSlotL dst, rRegL src)
8342 %{
8343 match(Set dst src);
8344
8345 ins_cost(100);
8346 format %{ "movq $dst, $src\t# long stk" %}
8347 ins_encode %{
8348 __ movq($dst$$Address, $src$$Register);
8349 %}
8350 ins_pipe(ialu_mem_reg);
8351 %}
8352
8353 instruct storeSSP(stackSlotP dst, rRegP src)
8354 %{
8355 match(Set dst src);
8356
8357 ins_cost(100);
8358 format %{ "movq $dst, $src\t# ptr stk" %}
8359 ins_encode %{
8360 __ movq($dst$$Address, $src$$Register);
8361 %}
8362 ins_pipe(ialu_mem_reg);
8363 %}
8364
8365 instruct storeSSF(stackSlotF dst, regF src)
8366 %{
8367 match(Set dst src);
8368
8369 ins_cost(95); // XXX
8370 format %{ "movss $dst, $src\t# float stk" %}
8371 ins_encode %{
8372 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8373 %}
8374 ins_pipe(pipe_slow); // XXX
8375 %}
8376
8377 instruct storeSSD(stackSlotD dst, regD src)
8378 %{
8379 match(Set dst src);
8380
8381 ins_cost(95); // XXX
8382 format %{ "movsd $dst, $src\t# double stk" %}
8383 ins_encode %{
8384 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8385 %}
8386 ins_pipe(pipe_slow); // XXX
8387 %}
8388
8389 instruct cacheWB(indirect addr)
8390 %{
8391 predicate(VM_Version::supports_data_cache_line_flush());
8392 match(CacheWB addr);
8393
8394 ins_cost(100);
8395 format %{"cache wb $addr" %}
8396 ins_encode %{
8397 assert($addr->index_position() < 0, "should be");
8398 assert($addr$$disp == 0, "should be");
8399 __ cache_wb(Address($addr$$base$$Register, 0));
8400 %}
8401 ins_pipe(pipe_slow); // XXX
8402 %}
8403
8404 instruct cacheWBPreSync()
8405 %{
8406 predicate(VM_Version::supports_data_cache_line_flush());
8407 match(CacheWBPreSync);
8408
8409 ins_cost(100);
8410 format %{"cache wb presync" %}
8411 ins_encode %{
8412 __ cache_wbsync(true);
8413 %}
8414 ins_pipe(pipe_slow); // XXX
8415 %}
8416
8417 instruct cacheWBPostSync()
8418 %{
8419 predicate(VM_Version::supports_data_cache_line_flush());
8420 match(CacheWBPostSync);
8421
8422 ins_cost(100);
8423 format %{"cache wb postsync" %}
8424 ins_encode %{
8425 __ cache_wbsync(false);
8426 %}
8427 ins_pipe(pipe_slow); // XXX
8428 %}
8429
8430 //----------BSWAP Instructions-------------------------------------------------
8431 instruct bytes_reverse_int(rRegI dst) %{
8432 match(Set dst (ReverseBytesI dst));
8433
8434 format %{ "bswapl $dst" %}
8435 ins_encode %{
8436 __ bswapl($dst$$Register);
8437 %}
8438 ins_pipe( ialu_reg );
8439 %}
8440
8441 instruct bytes_reverse_long(rRegL dst) %{
8442 match(Set dst (ReverseBytesL dst));
8443
8444 format %{ "bswapq $dst" %}
8445 ins_encode %{
8446 __ bswapq($dst$$Register);
8447 %}
8448 ins_pipe( ialu_reg);
8449 %}
8450
8451 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8452 match(Set dst (ReverseBytesUS dst));
8453 effect(KILL cr);
8454
8455 format %{ "bswapl $dst\n\t"
8456 "shrl $dst,16\n\t" %}
8457 ins_encode %{
8458 __ bswapl($dst$$Register);
8459 __ shrl($dst$$Register, 16);
8460 %}
8461 ins_pipe( ialu_reg );
8462 %}
8463
8464 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8465 match(Set dst (ReverseBytesS dst));
8466 effect(KILL cr);
8467
8468 format %{ "bswapl $dst\n\t"
8469 "sar $dst,16\n\t" %}
8470 ins_encode %{
8471 __ bswapl($dst$$Register);
8472 __ sarl($dst$$Register, 16);
8473 %}
8474 ins_pipe( ialu_reg );
8475 %}
8476
8477 //---------- Zeros Count Instructions ------------------------------------------
8478
8479 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8480 predicate(UseCountLeadingZerosInstruction);
8481 match(Set dst (CountLeadingZerosI src));
8482 effect(KILL cr);
8483
8484 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8485 ins_encode %{
8486 __ lzcntl($dst$$Register, $src$$Register);
8487 %}
8488 ins_pipe(ialu_reg);
8489 %}
8490
8491 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8492 predicate(UseCountLeadingZerosInstruction);
8493 match(Set dst (CountLeadingZerosI (LoadI src)));
8494 effect(KILL cr);
8495 ins_cost(175);
8496 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8497 ins_encode %{
8498 __ lzcntl($dst$$Register, $src$$Address);
8499 %}
8500 ins_pipe(ialu_reg_mem);
8501 %}
8502
8503 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8504 predicate(!UseCountLeadingZerosInstruction);
8505 match(Set dst (CountLeadingZerosI src));
8506 effect(KILL cr);
8507
8508 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8509 "jnz skip\n\t"
8510 "movl $dst, -1\n"
8511 "skip:\n\t"
8512 "negl $dst\n\t"
8513 "addl $dst, 31" %}
8514 ins_encode %{
8515 Register Rdst = $dst$$Register;
8516 Register Rsrc = $src$$Register;
8517 Label skip;
8518 __ bsrl(Rdst, Rsrc);
8519 __ jccb(Assembler::notZero, skip);
8520 __ movl(Rdst, -1);
8521 __ bind(skip);
8522 __ negl(Rdst);
8523 __ addl(Rdst, BitsPerInt - 1);
8524 %}
8525 ins_pipe(ialu_reg);
8526 %}
8527
8528 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8529 predicate(UseCountLeadingZerosInstruction);
8530 match(Set dst (CountLeadingZerosL src));
8531 effect(KILL cr);
8532
8533 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8534 ins_encode %{
8535 __ lzcntq($dst$$Register, $src$$Register);
8536 %}
8537 ins_pipe(ialu_reg);
8538 %}
8539
8540 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8541 predicate(UseCountLeadingZerosInstruction);
8542 match(Set dst (CountLeadingZerosL (LoadL src)));
8543 effect(KILL cr);
8544 ins_cost(175);
8545 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8546 ins_encode %{
8547 __ lzcntq($dst$$Register, $src$$Address);
8548 %}
8549 ins_pipe(ialu_reg_mem);
8550 %}
8551
8552 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8553 predicate(!UseCountLeadingZerosInstruction);
8554 match(Set dst (CountLeadingZerosL src));
8555 effect(KILL cr);
8556
8557 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8558 "jnz skip\n\t"
8559 "movl $dst, -1\n"
8560 "skip:\n\t"
8561 "negl $dst\n\t"
8562 "addl $dst, 63" %}
8563 ins_encode %{
8564 Register Rdst = $dst$$Register;
8565 Register Rsrc = $src$$Register;
8566 Label skip;
8567 __ bsrq(Rdst, Rsrc);
8568 __ jccb(Assembler::notZero, skip);
8569 __ movl(Rdst, -1);
8570 __ bind(skip);
8571 __ negl(Rdst);
8572 __ addl(Rdst, BitsPerLong - 1);
8573 %}
8574 ins_pipe(ialu_reg);
8575 %}
8576
8577 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8578 predicate(UseCountTrailingZerosInstruction);
8579 match(Set dst (CountTrailingZerosI src));
8580 effect(KILL cr);
8581
8582 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8583 ins_encode %{
8584 __ tzcntl($dst$$Register, $src$$Register);
8585 %}
8586 ins_pipe(ialu_reg);
8587 %}
8588
8589 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8590 predicate(UseCountTrailingZerosInstruction);
8591 match(Set dst (CountTrailingZerosI (LoadI src)));
8592 effect(KILL cr);
8593 ins_cost(175);
8594 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8595 ins_encode %{
8596 __ tzcntl($dst$$Register, $src$$Address);
8597 %}
8598 ins_pipe(ialu_reg_mem);
8599 %}
8600
8601 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8602 predicate(!UseCountTrailingZerosInstruction);
8603 match(Set dst (CountTrailingZerosI src));
8604 effect(KILL cr);
8605
8606 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8607 "jnz done\n\t"
8608 "movl $dst, 32\n"
8609 "done:" %}
8610 ins_encode %{
8611 Register Rdst = $dst$$Register;
8612 Label done;
8613 __ bsfl(Rdst, $src$$Register);
8614 __ jccb(Assembler::notZero, done);
8615 __ movl(Rdst, BitsPerInt);
8616 __ bind(done);
8617 %}
8618 ins_pipe(ialu_reg);
8619 %}
8620
8621 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8622 predicate(UseCountTrailingZerosInstruction);
8623 match(Set dst (CountTrailingZerosL src));
8624 effect(KILL cr);
8625
8626 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8627 ins_encode %{
8628 __ tzcntq($dst$$Register, $src$$Register);
8629 %}
8630 ins_pipe(ialu_reg);
8631 %}
8632
8633 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8634 predicate(UseCountTrailingZerosInstruction);
8635 match(Set dst (CountTrailingZerosL (LoadL src)));
8636 effect(KILL cr);
8637 ins_cost(175);
8638 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8639 ins_encode %{
8640 __ tzcntq($dst$$Register, $src$$Address);
8641 %}
8642 ins_pipe(ialu_reg_mem);
8643 %}
8644
8645 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8646 predicate(!UseCountTrailingZerosInstruction);
8647 match(Set dst (CountTrailingZerosL src));
8648 effect(KILL cr);
8649
8650 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8651 "jnz done\n\t"
8652 "movl $dst, 64\n"
8653 "done:" %}
8654 ins_encode %{
8655 Register Rdst = $dst$$Register;
8656 Label done;
8657 __ bsfq(Rdst, $src$$Register);
8658 __ jccb(Assembler::notZero, done);
8659 __ movl(Rdst, BitsPerLong);
8660 __ bind(done);
8661 %}
8662 ins_pipe(ialu_reg);
8663 %}
8664
8665 //--------------- Reverse Operation Instructions ----------------
8666 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8667 predicate(!VM_Version::supports_gfni());
8668 match(Set dst (ReverseI src));
8669 effect(TEMP dst, TEMP rtmp, KILL cr);
8670 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8671 ins_encode %{
8672 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8673 %}
8674 ins_pipe( ialu_reg );
8675 %}
8676
8677 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8678 predicate(VM_Version::supports_gfni());
8679 match(Set dst (ReverseI src));
8680 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8681 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8682 ins_encode %{
8683 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8684 %}
8685 ins_pipe( ialu_reg );
8686 %}
8687
8688 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8689 predicate(!VM_Version::supports_gfni());
8690 match(Set dst (ReverseL src));
8691 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8692 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8693 ins_encode %{
8694 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8695 %}
8696 ins_pipe( ialu_reg );
8697 %}
8698
8699 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8700 predicate(VM_Version::supports_gfni());
8701 match(Set dst (ReverseL src));
8702 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8703 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8704 ins_encode %{
8705 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8706 %}
8707 ins_pipe( ialu_reg );
8708 %}
8709
8710 //---------- Population Count Instructions -------------------------------------
8711
8712 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8713 predicate(UsePopCountInstruction);
8714 match(Set dst (PopCountI src));
8715 effect(KILL cr);
8716
8717 format %{ "popcnt $dst, $src" %}
8718 ins_encode %{
8719 __ popcntl($dst$$Register, $src$$Register);
8720 %}
8721 ins_pipe(ialu_reg);
8722 %}
8723
8724 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8725 predicate(UsePopCountInstruction);
8726 match(Set dst (PopCountI (LoadI mem)));
8727 effect(KILL cr);
8728
8729 format %{ "popcnt $dst, $mem" %}
8730 ins_encode %{
8731 __ popcntl($dst$$Register, $mem$$Address);
8732 %}
8733 ins_pipe(ialu_reg);
8734 %}
8735
8736 // Note: Long.bitCount(long) returns an int.
8737 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8738 predicate(UsePopCountInstruction);
8739 match(Set dst (PopCountL src));
8740 effect(KILL cr);
8741
8742 format %{ "popcnt $dst, $src" %}
8743 ins_encode %{
8744 __ popcntq($dst$$Register, $src$$Register);
8745 %}
8746 ins_pipe(ialu_reg);
8747 %}
8748
8749 // Note: Long.bitCount(long) returns an int.
8750 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8751 predicate(UsePopCountInstruction);
8752 match(Set dst (PopCountL (LoadL mem)));
8753 effect(KILL cr);
8754
8755 format %{ "popcnt $dst, $mem" %}
8756 ins_encode %{
8757 __ popcntq($dst$$Register, $mem$$Address);
8758 %}
8759 ins_pipe(ialu_reg);
8760 %}
8761
8762
8763 //----------MemBar Instructions-----------------------------------------------
8764 // Memory barrier flavors
8765
8766 instruct membar_acquire()
8767 %{
8768 match(MemBarAcquire);
8769 match(LoadFence);
8770 ins_cost(0);
8771
8772 size(0);
8773 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8774 ins_encode();
8775 ins_pipe(empty);
8776 %}
8777
8778 instruct membar_acquire_lock()
8779 %{
8780 match(MemBarAcquireLock);
8781 ins_cost(0);
8782
8783 size(0);
8784 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8785 ins_encode();
8786 ins_pipe(empty);
8787 %}
8788
8789 instruct membar_release()
8790 %{
8791 match(MemBarRelease);
8792 match(StoreFence);
8793 ins_cost(0);
8794
8795 size(0);
8796 format %{ "MEMBAR-release ! (empty encoding)" %}
8797 ins_encode();
8798 ins_pipe(empty);
8799 %}
8800
8801 instruct membar_release_lock()
8802 %{
8803 match(MemBarReleaseLock);
8804 ins_cost(0);
8805
8806 size(0);
8807 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8808 ins_encode();
8809 ins_pipe(empty);
8810 %}
8811
8812 instruct membar_volatile(rFlagsReg cr) %{
8813 match(MemBarVolatile);
8814 effect(KILL cr);
8815 ins_cost(400);
8816
8817 format %{
8818 $$template
8819 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8820 %}
8821 ins_encode %{
8822 __ membar(Assembler::StoreLoad);
8823 %}
8824 ins_pipe(pipe_slow);
8825 %}
8826
8827 instruct unnecessary_membar_volatile()
8828 %{
8829 match(MemBarVolatile);
8830 predicate(Matcher::post_store_load_barrier(n));
8831 ins_cost(0);
8832
8833 size(0);
8834 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8835 ins_encode();
8836 ins_pipe(empty);
8837 %}
8838
8839 instruct membar_storestore() %{
8840 match(MemBarStoreStore);
8841 match(StoreStoreFence);
8842 ins_cost(0);
8843
8844 size(0);
8845 format %{ "MEMBAR-storestore (empty encoding)" %}
8846 ins_encode( );
8847 ins_pipe(empty);
8848 %}
8849
8850 //----------Move Instructions--------------------------------------------------
8851
8852 instruct castX2P(rRegP dst, rRegL src)
8853 %{
8854 match(Set dst (CastX2P src));
8855
8856 format %{ "movq $dst, $src\t# long->ptr" %}
8857 ins_encode %{
8858 if ($dst$$reg != $src$$reg) {
8859 __ movptr($dst$$Register, $src$$Register);
8860 }
8861 %}
8862 ins_pipe(ialu_reg_reg); // XXX
8863 %}
8864
8865 instruct castP2X(rRegL dst, rRegP src)
8866 %{
8867 match(Set dst (CastP2X src));
8868
8869 format %{ "movq $dst, $src\t# ptr -> long" %}
8870 ins_encode %{
8871 if ($dst$$reg != $src$$reg) {
8872 __ movptr($dst$$Register, $src$$Register);
8873 }
8874 %}
8875 ins_pipe(ialu_reg_reg); // XXX
8876 %}
8877
8878 // Convert oop into int for vectors alignment masking
8879 instruct convP2I(rRegI dst, rRegP src)
8880 %{
8881 match(Set dst (ConvL2I (CastP2X src)));
8882
8883 format %{ "movl $dst, $src\t# ptr -> int" %}
8884 ins_encode %{
8885 __ movl($dst$$Register, $src$$Register);
8886 %}
8887 ins_pipe(ialu_reg_reg); // XXX
8888 %}
8889
8890 // Convert compressed oop into int for vectors alignment masking
8891 // in case of 32bit oops (heap < 4Gb).
8892 instruct convN2I(rRegI dst, rRegN src)
8893 %{
8894 predicate(CompressedOops::shift() == 0);
8895 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8896
8897 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8898 ins_encode %{
8899 __ movl($dst$$Register, $src$$Register);
8900 %}
8901 ins_pipe(ialu_reg_reg); // XXX
8902 %}
8903
8904 // Convert oop pointer into compressed form
8905 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8906 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8907 match(Set dst (EncodeP src));
8908 effect(KILL cr);
8909 format %{ "encode_heap_oop $dst,$src" %}
8910 ins_encode %{
8911 Register s = $src$$Register;
8912 Register d = $dst$$Register;
8913 if (s != d) {
8914 __ movq(d, s);
8915 }
8916 __ encode_heap_oop(d);
8917 %}
8918 ins_pipe(ialu_reg_long);
8919 %}
8920
8921 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8922 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8923 match(Set dst (EncodeP src));
8924 effect(KILL cr);
8925 format %{ "encode_heap_oop_not_null $dst,$src" %}
8926 ins_encode %{
8927 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8928 %}
8929 ins_pipe(ialu_reg_long);
8930 %}
8931
8932 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8933 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8934 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8935 match(Set dst (DecodeN src));
8936 effect(KILL cr);
8937 format %{ "decode_heap_oop $dst,$src" %}
8938 ins_encode %{
8939 Register s = $src$$Register;
8940 Register d = $dst$$Register;
8941 if (s != d) {
8942 __ movq(d, s);
8943 }
8944 __ decode_heap_oop(d);
8945 %}
8946 ins_pipe(ialu_reg_long);
8947 %}
8948
8949 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8950 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8951 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8952 match(Set dst (DecodeN src));
8953 effect(KILL cr);
8954 format %{ "decode_heap_oop_not_null $dst,$src" %}
8955 ins_encode %{
8956 Register s = $src$$Register;
8957 Register d = $dst$$Register;
8958 if (s != d) {
8959 __ decode_heap_oop_not_null(d, s);
8960 } else {
8961 __ decode_heap_oop_not_null(d);
8962 }
8963 %}
8964 ins_pipe(ialu_reg_long);
8965 %}
8966
8967 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8968 match(Set dst (EncodePKlass src));
8969 effect(TEMP dst, KILL cr);
8970 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8971 ins_encode %{
8972 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8973 %}
8974 ins_pipe(ialu_reg_long);
8975 %}
8976
8977 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8978 match(Set dst (DecodeNKlass src));
8979 effect(TEMP dst, KILL cr);
8980 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8981 ins_encode %{
8982 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8983 %}
8984 ins_pipe(ialu_reg_long);
8985 %}
8986
8987 //----------Conditional Move---------------------------------------------------
8988 // Jump
8989 // dummy instruction for generating temp registers
8990 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8991 match(Jump (LShiftL switch_val shift));
8992 ins_cost(350);
8993 predicate(false);
8994 effect(TEMP dest);
8995
8996 format %{ "leaq $dest, [$constantaddress]\n\t"
8997 "jmp [$dest + $switch_val << $shift]\n\t" %}
8998 ins_encode %{
8999 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9000 // to do that and the compiler is using that register as one it can allocate.
9001 // So we build it all by hand.
9002 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9003 // ArrayAddress dispatch(table, index);
9004 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9005 __ lea($dest$$Register, $constantaddress);
9006 __ jmp(dispatch);
9007 %}
9008 ins_pipe(pipe_jmp);
9009 %}
9010
9011 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9012 match(Jump (AddL (LShiftL switch_val shift) offset));
9013 ins_cost(350);
9014 effect(TEMP dest);
9015
9016 format %{ "leaq $dest, [$constantaddress]\n\t"
9017 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9018 ins_encode %{
9019 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9020 // to do that and the compiler is using that register as one it can allocate.
9021 // So we build it all by hand.
9022 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9023 // ArrayAddress dispatch(table, index);
9024 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9025 __ lea($dest$$Register, $constantaddress);
9026 __ jmp(dispatch);
9027 %}
9028 ins_pipe(pipe_jmp);
9029 %}
9030
9031 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9032 match(Jump switch_val);
9033 ins_cost(350);
9034 effect(TEMP dest);
9035
9036 format %{ "leaq $dest, [$constantaddress]\n\t"
9037 "jmp [$dest + $switch_val]\n\t" %}
9038 ins_encode %{
9039 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9040 // to do that and the compiler is using that register as one it can allocate.
9041 // So we build it all by hand.
9042 // Address index(noreg, switch_reg, Address::times_1);
9043 // ArrayAddress dispatch(table, index);
9044 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9045 __ lea($dest$$Register, $constantaddress);
9046 __ jmp(dispatch);
9047 %}
9048 ins_pipe(pipe_jmp);
9049 %}
9050
9051 // Conditional move
9052 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9053 %{
9054 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9055 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9056
9057 ins_cost(100); // XXX
9058 format %{ "setbn$cop $dst\t# signed, int" %}
9059 ins_encode %{
9060 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9061 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9062 %}
9063 ins_pipe(ialu_reg);
9064 %}
9065
9066 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9067 %{
9068 predicate(!UseAPX);
9069 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9070
9071 ins_cost(200); // XXX
9072 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9073 ins_encode %{
9074 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9075 %}
9076 ins_pipe(pipe_cmov_reg);
9077 %}
9078
9079 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9080 %{
9081 predicate(UseAPX);
9082 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9083
9084 ins_cost(200);
9085 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9086 ins_encode %{
9087 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9088 %}
9089 ins_pipe(pipe_cmov_reg);
9090 %}
9091
9092 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9093 %{
9094 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9095 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9096
9097 ins_cost(100); // XXX
9098 format %{ "setbn$cop $dst\t# unsigned, int" %}
9099 ins_encode %{
9100 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9101 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9102 %}
9103 ins_pipe(ialu_reg);
9104 %}
9105
9106 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9107 predicate(!UseAPX);
9108 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9109
9110 ins_cost(200); // XXX
9111 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9112 ins_encode %{
9113 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9114 %}
9115 ins_pipe(pipe_cmov_reg);
9116 %}
9117
9118 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9119 predicate(UseAPX);
9120 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9121
9122 ins_cost(200);
9123 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9124 ins_encode %{
9125 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9126 %}
9127 ins_pipe(pipe_cmov_reg);
9128 %}
9129
9130 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9131 %{
9132 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9133 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9134
9135 ins_cost(100); // XXX
9136 format %{ "setbn$cop $dst\t# unsigned, int" %}
9137 ins_encode %{
9138 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9139 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9140 %}
9141 ins_pipe(ialu_reg);
9142 %}
9143
9144 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9145 predicate(!UseAPX);
9146 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9147 ins_cost(200);
9148 expand %{
9149 cmovI_regU(cop, cr, dst, src);
9150 %}
9151 %}
9152
9153 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9154 predicate(UseAPX);
9155 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9156 ins_cost(200);
9157 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9158 ins_encode %{
9159 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9160 %}
9161 ins_pipe(pipe_cmov_reg);
9162 %}
9163
9164 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9165 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9166 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9167
9168 ins_cost(200); // XXX
9169 format %{ "cmovpl $dst, $src\n\t"
9170 "cmovnel $dst, $src" %}
9171 ins_encode %{
9172 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9173 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9174 %}
9175 ins_pipe(pipe_cmov_reg);
9176 %}
9177
9178 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9179 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9180 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9181 effect(TEMP dst);
9182
9183 ins_cost(200);
9184 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9185 "cmovnel $dst, $src2" %}
9186 ins_encode %{
9187 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9188 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9189 %}
9190 ins_pipe(pipe_cmov_reg);
9191 %}
9192
9193 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9194 // inputs of the CMove
9195 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9196 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9197 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9198 effect(TEMP dst);
9199
9200 ins_cost(200); // XXX
9201 format %{ "cmovpl $dst, $src\n\t"
9202 "cmovnel $dst, $src" %}
9203 ins_encode %{
9204 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9205 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9206 %}
9207 ins_pipe(pipe_cmov_reg);
9208 %}
9209
9210 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9211 // and parity flag bit is set if any of the operand is a NaN.
9212 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9213 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9214 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9215 effect(TEMP dst);
9216
9217 ins_cost(200);
9218 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9219 "cmovnel $dst, $src2" %}
9220 ins_encode %{
9221 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9222 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9223 %}
9224 ins_pipe(pipe_cmov_reg);
9225 %}
9226
9227 // Conditional move
9228 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9229 predicate(!UseAPX);
9230 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9231
9232 ins_cost(250); // XXX
9233 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9234 ins_encode %{
9235 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9236 %}
9237 ins_pipe(pipe_cmov_mem);
9238 %}
9239
9240 // Conditional move
9241 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9242 %{
9243 predicate(UseAPX);
9244 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9245
9246 ins_cost(250);
9247 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9248 ins_encode %{
9249 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9250 %}
9251 ins_pipe(pipe_cmov_mem);
9252 %}
9253
9254 // Conditional move
9255 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9256 %{
9257 predicate(!UseAPX);
9258 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9259
9260 ins_cost(250); // XXX
9261 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9262 ins_encode %{
9263 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9264 %}
9265 ins_pipe(pipe_cmov_mem);
9266 %}
9267
9268 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9269 predicate(!UseAPX);
9270 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9271 ins_cost(250);
9272 expand %{
9273 cmovI_memU(cop, cr, dst, src);
9274 %}
9275 %}
9276
9277 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9278 %{
9279 predicate(UseAPX);
9280 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9281
9282 ins_cost(250);
9283 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9284 ins_encode %{
9285 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9286 %}
9287 ins_pipe(pipe_cmov_mem);
9288 %}
9289
9290 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9291 %{
9292 predicate(UseAPX);
9293 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9294 ins_cost(250);
9295 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9296 ins_encode %{
9297 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9298 %}
9299 ins_pipe(pipe_cmov_mem);
9300 %}
9301
9302 // Conditional move
9303 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9304 %{
9305 predicate(!UseAPX);
9306 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9307
9308 ins_cost(200); // XXX
9309 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9310 ins_encode %{
9311 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9312 %}
9313 ins_pipe(pipe_cmov_reg);
9314 %}
9315
9316 // Conditional move ndd
9317 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9318 %{
9319 predicate(UseAPX);
9320 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9321
9322 ins_cost(200);
9323 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9324 ins_encode %{
9325 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9326 %}
9327 ins_pipe(pipe_cmov_reg);
9328 %}
9329
9330 // Conditional move
9331 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9332 %{
9333 predicate(!UseAPX);
9334 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9335
9336 ins_cost(200); // XXX
9337 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9338 ins_encode %{
9339 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9340 %}
9341 ins_pipe(pipe_cmov_reg);
9342 %}
9343
9344 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9345 predicate(!UseAPX);
9346 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9347 ins_cost(200);
9348 expand %{
9349 cmovN_regU(cop, cr, dst, src);
9350 %}
9351 %}
9352
9353 // Conditional move ndd
9354 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9355 %{
9356 predicate(UseAPX);
9357 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9358
9359 ins_cost(200);
9360 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9361 ins_encode %{
9362 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9363 %}
9364 ins_pipe(pipe_cmov_reg);
9365 %}
9366
9367 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9368 predicate(UseAPX);
9369 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9370 ins_cost(200);
9371 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9372 ins_encode %{
9373 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9374 %}
9375 ins_pipe(pipe_cmov_reg);
9376 %}
9377
9378 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9379 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9380 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9381
9382 ins_cost(200); // XXX
9383 format %{ "cmovpl $dst, $src\n\t"
9384 "cmovnel $dst, $src" %}
9385 ins_encode %{
9386 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9387 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9388 %}
9389 ins_pipe(pipe_cmov_reg);
9390 %}
9391
9392 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9393 // inputs of the CMove
9394 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9395 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9396 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9397
9398 ins_cost(200); // XXX
9399 format %{ "cmovpl $dst, $src\n\t"
9400 "cmovnel $dst, $src" %}
9401 ins_encode %{
9402 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9403 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9404 %}
9405 ins_pipe(pipe_cmov_reg);
9406 %}
9407
9408 // Conditional move
9409 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9410 %{
9411 predicate(!UseAPX);
9412 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9413
9414 ins_cost(200); // XXX
9415 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9416 ins_encode %{
9417 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9418 %}
9419 ins_pipe(pipe_cmov_reg); // XXX
9420 %}
9421
9422 // Conditional move ndd
9423 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9424 %{
9425 predicate(UseAPX);
9426 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9427
9428 ins_cost(200);
9429 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9430 ins_encode %{
9431 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9432 %}
9433 ins_pipe(pipe_cmov_reg);
9434 %}
9435
9436 // Conditional move
9437 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9438 %{
9439 predicate(!UseAPX);
9440 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9441
9442 ins_cost(200); // XXX
9443 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9444 ins_encode %{
9445 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9446 %}
9447 ins_pipe(pipe_cmov_reg); // XXX
9448 %}
9449
9450 // Conditional move ndd
9451 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9452 %{
9453 predicate(UseAPX);
9454 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9455
9456 ins_cost(200);
9457 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9458 ins_encode %{
9459 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9460 %}
9461 ins_pipe(pipe_cmov_reg);
9462 %}
9463
9464 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9465 predicate(!UseAPX);
9466 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9467 ins_cost(200);
9468 expand %{
9469 cmovP_regU(cop, cr, dst, src);
9470 %}
9471 %}
9472
9473 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9474 predicate(UseAPX);
9475 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9476 ins_cost(200);
9477 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9478 ins_encode %{
9479 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9480 %}
9481 ins_pipe(pipe_cmov_reg);
9482 %}
9483
9484 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9485 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9486 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9487
9488 ins_cost(200); // XXX
9489 format %{ "cmovpq $dst, $src\n\t"
9490 "cmovneq $dst, $src" %}
9491 ins_encode %{
9492 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9493 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9494 %}
9495 ins_pipe(pipe_cmov_reg);
9496 %}
9497
9498 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9499 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9500 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9501 effect(TEMP dst);
9502
9503 ins_cost(200);
9504 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9505 "cmovneq $dst, $src2" %}
9506 ins_encode %{
9507 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9508 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9509 %}
9510 ins_pipe(pipe_cmov_reg);
9511 %}
9512
9513 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9514 // inputs of the CMove
9515 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9516 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9517 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9518
9519 ins_cost(200); // XXX
9520 format %{ "cmovpq $dst, $src\n\t"
9521 "cmovneq $dst, $src" %}
9522 ins_encode %{
9523 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9524 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9525 %}
9526 ins_pipe(pipe_cmov_reg);
9527 %}
9528
9529 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9530 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9531 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9532 effect(TEMP dst);
9533
9534 ins_cost(200);
9535 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9536 "cmovneq $dst, $src2" %}
9537 ins_encode %{
9538 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9539 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9540 %}
9541 ins_pipe(pipe_cmov_reg);
9542 %}
9543
9544 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9545 %{
9546 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9547 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9548
9549 ins_cost(100); // XXX
9550 format %{ "setbn$cop $dst\t# signed, long" %}
9551 ins_encode %{
9552 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9553 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9554 %}
9555 ins_pipe(ialu_reg);
9556 %}
9557
9558 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9559 %{
9560 predicate(!UseAPX);
9561 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9562
9563 ins_cost(200); // XXX
9564 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9565 ins_encode %{
9566 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9567 %}
9568 ins_pipe(pipe_cmov_reg); // XXX
9569 %}
9570
9571 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9572 %{
9573 predicate(UseAPX);
9574 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9575
9576 ins_cost(200);
9577 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9578 ins_encode %{
9579 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9580 %}
9581 ins_pipe(pipe_cmov_reg);
9582 %}
9583
9584 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9585 %{
9586 predicate(!UseAPX);
9587 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9588
9589 ins_cost(200); // XXX
9590 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9591 ins_encode %{
9592 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9593 %}
9594 ins_pipe(pipe_cmov_mem); // XXX
9595 %}
9596
9597 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9598 %{
9599 predicate(UseAPX);
9600 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9601
9602 ins_cost(200);
9603 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9604 ins_encode %{
9605 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9606 %}
9607 ins_pipe(pipe_cmov_mem);
9608 %}
9609
9610 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9611 %{
9612 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9613 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9614
9615 ins_cost(100); // XXX
9616 format %{ "setbn$cop $dst\t# unsigned, long" %}
9617 ins_encode %{
9618 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9619 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9620 %}
9621 ins_pipe(ialu_reg);
9622 %}
9623
9624 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9625 %{
9626 predicate(!UseAPX);
9627 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9628
9629 ins_cost(200); // XXX
9630 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9631 ins_encode %{
9632 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9633 %}
9634 ins_pipe(pipe_cmov_reg); // XXX
9635 %}
9636
9637 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9638 %{
9639 predicate(UseAPX);
9640 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9641
9642 ins_cost(200);
9643 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9644 ins_encode %{
9645 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9646 %}
9647 ins_pipe(pipe_cmov_reg);
9648 %}
9649
9650 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9651 %{
9652 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9653 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9654
9655 ins_cost(100); // XXX
9656 format %{ "setbn$cop $dst\t# unsigned, long" %}
9657 ins_encode %{
9658 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9659 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9660 %}
9661 ins_pipe(ialu_reg);
9662 %}
9663
9664 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9665 predicate(!UseAPX);
9666 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9667 ins_cost(200);
9668 expand %{
9669 cmovL_regU(cop, cr, dst, src);
9670 %}
9671 %}
9672
9673 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9674 %{
9675 predicate(UseAPX);
9676 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9677 ins_cost(200);
9678 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9679 ins_encode %{
9680 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9681 %}
9682 ins_pipe(pipe_cmov_reg);
9683 %}
9684
9685 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9686 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9687 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9688
9689 ins_cost(200); // XXX
9690 format %{ "cmovpq $dst, $src\n\t"
9691 "cmovneq $dst, $src" %}
9692 ins_encode %{
9693 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9694 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9695 %}
9696 ins_pipe(pipe_cmov_reg);
9697 %}
9698
9699 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9700 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9701 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9702 effect(TEMP dst);
9703
9704 ins_cost(200);
9705 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9706 "cmovneq $dst, $src2" %}
9707 ins_encode %{
9708 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9709 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9710 %}
9711 ins_pipe(pipe_cmov_reg);
9712 %}
9713
9714 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9715 // inputs of the CMove
9716 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9717 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9718 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9719
9720 ins_cost(200); // XXX
9721 format %{ "cmovpq $dst, $src\n\t"
9722 "cmovneq $dst, $src" %}
9723 ins_encode %{
9724 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9725 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9726 %}
9727 ins_pipe(pipe_cmov_reg);
9728 %}
9729
9730 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9731 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9732 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9733 effect(TEMP dst);
9734
9735 ins_cost(200);
9736 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9737 "cmovneq $dst, $src2" %}
9738 ins_encode %{
9739 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9740 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9741 %}
9742 ins_pipe(pipe_cmov_reg);
9743 %}
9744
9745 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9746 %{
9747 predicate(!UseAPX);
9748 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9749
9750 ins_cost(200); // XXX
9751 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9752 ins_encode %{
9753 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9754 %}
9755 ins_pipe(pipe_cmov_mem); // XXX
9756 %}
9757
9758 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9759 predicate(!UseAPX);
9760 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9761 ins_cost(200);
9762 expand %{
9763 cmovL_memU(cop, cr, dst, src);
9764 %}
9765 %}
9766
9767 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9768 %{
9769 predicate(UseAPX);
9770 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9771
9772 ins_cost(200);
9773 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9774 ins_encode %{
9775 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9776 %}
9777 ins_pipe(pipe_cmov_mem);
9778 %}
9779
9780 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9781 %{
9782 predicate(UseAPX);
9783 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9784 ins_cost(200);
9785 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9786 ins_encode %{
9787 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9788 %}
9789 ins_pipe(pipe_cmov_mem);
9790 %}
9791
9792 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9793 %{
9794 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9795
9796 ins_cost(200); // XXX
9797 format %{ "jn$cop skip\t# signed cmove float\n\t"
9798 "movss $dst, $src\n"
9799 "skip:" %}
9800 ins_encode %{
9801 Label Lskip;
9802 // Invert sense of branch from sense of CMOV
9803 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9804 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9805 __ bind(Lskip);
9806 %}
9807 ins_pipe(pipe_slow);
9808 %}
9809
9810 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9811 %{
9812 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9813
9814 ins_cost(200); // XXX
9815 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9816 "movss $dst, $src\n"
9817 "skip:" %}
9818 ins_encode %{
9819 Label Lskip;
9820 // Invert sense of branch from sense of CMOV
9821 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9822 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9823 __ bind(Lskip);
9824 %}
9825 ins_pipe(pipe_slow);
9826 %}
9827
9828 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9829 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9830 ins_cost(200);
9831 expand %{
9832 cmovF_regU(cop, cr, dst, src);
9833 %}
9834 %}
9835
9836 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9837 %{
9838 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9839
9840 ins_cost(200); // XXX
9841 format %{ "jn$cop skip\t# signed cmove double\n\t"
9842 "movsd $dst, $src\n"
9843 "skip:" %}
9844 ins_encode %{
9845 Label Lskip;
9846 // Invert sense of branch from sense of CMOV
9847 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9848 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9849 __ bind(Lskip);
9850 %}
9851 ins_pipe(pipe_slow);
9852 %}
9853
9854 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9855 %{
9856 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9857
9858 ins_cost(200); // XXX
9859 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9860 "movsd $dst, $src\n"
9861 "skip:" %}
9862 ins_encode %{
9863 Label Lskip;
9864 // Invert sense of branch from sense of CMOV
9865 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9866 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9867 __ bind(Lskip);
9868 %}
9869 ins_pipe(pipe_slow);
9870 %}
9871
9872 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9873 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9874 ins_cost(200);
9875 expand %{
9876 cmovD_regU(cop, cr, dst, src);
9877 %}
9878 %}
9879
9880 //----------Arithmetic Instructions--------------------------------------------
9881 //----------Addition Instructions----------------------------------------------
9882
9883 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9884 %{
9885 predicate(!UseAPX);
9886 match(Set dst (AddI dst src));
9887 effect(KILL cr);
9888 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9889 format %{ "addl $dst, $src\t# int" %}
9890 ins_encode %{
9891 __ addl($dst$$Register, $src$$Register);
9892 %}
9893 ins_pipe(ialu_reg_reg);
9894 %}
9895
9896 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9897 %{
9898 predicate(UseAPX);
9899 match(Set dst (AddI src1 src2));
9900 effect(KILL cr);
9901 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9902
9903 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9904 ins_encode %{
9905 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9906 %}
9907 ins_pipe(ialu_reg_reg);
9908 %}
9909
9910 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9911 %{
9912 predicate(!UseAPX);
9913 match(Set dst (AddI dst src));
9914 effect(KILL cr);
9915 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9916
9917 format %{ "addl $dst, $src\t# int" %}
9918 ins_encode %{
9919 __ addl($dst$$Register, $src$$constant);
9920 %}
9921 ins_pipe( ialu_reg );
9922 %}
9923
9924 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9925 %{
9926 predicate(UseAPX);
9927 match(Set dst (AddI src1 src2));
9928 effect(KILL cr);
9929 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9930
9931 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9932 ins_encode %{
9933 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9934 %}
9935 ins_pipe( ialu_reg );
9936 %}
9937
9938 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9939 %{
9940 predicate(UseAPX);
9941 match(Set dst (AddI (LoadI src1) src2));
9942 effect(KILL cr);
9943 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9944
9945 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9946 ins_encode %{
9947 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9948 %}
9949 ins_pipe( ialu_reg );
9950 %}
9951
9952 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9953 %{
9954 predicate(!UseAPX);
9955 match(Set dst (AddI dst (LoadI src)));
9956 effect(KILL cr);
9957 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9958
9959 ins_cost(150); // XXX
9960 format %{ "addl $dst, $src\t# int" %}
9961 ins_encode %{
9962 __ addl($dst$$Register, $src$$Address);
9963 %}
9964 ins_pipe(ialu_reg_mem);
9965 %}
9966
9967 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9968 %{
9969 predicate(UseAPX);
9970 match(Set dst (AddI src1 (LoadI src2)));
9971 effect(KILL cr);
9972 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9973
9974 ins_cost(150);
9975 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9976 ins_encode %{
9977 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9978 %}
9979 ins_pipe(ialu_reg_mem);
9980 %}
9981
9982 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9983 %{
9984 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9985 effect(KILL cr);
9986 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9987
9988 ins_cost(150); // XXX
9989 format %{ "addl $dst, $src\t# int" %}
9990 ins_encode %{
9991 __ addl($dst$$Address, $src$$Register);
9992 %}
9993 ins_pipe(ialu_mem_reg);
9994 %}
9995
9996 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9997 %{
9998 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9999 effect(KILL cr);
10000 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10001
10002
10003 ins_cost(125); // XXX
10004 format %{ "addl $dst, $src\t# int" %}
10005 ins_encode %{
10006 __ addl($dst$$Address, $src$$constant);
10007 %}
10008 ins_pipe(ialu_mem_imm);
10009 %}
10010
10011 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10012 %{
10013 predicate(!UseAPX && UseIncDec);
10014 match(Set dst (AddI dst src));
10015 effect(KILL cr);
10016
10017 format %{ "incl $dst\t# int" %}
10018 ins_encode %{
10019 __ incrementl($dst$$Register);
10020 %}
10021 ins_pipe(ialu_reg);
10022 %}
10023
10024 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10025 %{
10026 predicate(UseAPX && UseIncDec);
10027 match(Set dst (AddI src val));
10028 effect(KILL cr);
10029 flag(PD::Flag_ndd_demotable_opr1);
10030
10031 format %{ "eincl $dst, $src\t# int ndd" %}
10032 ins_encode %{
10033 __ eincl($dst$$Register, $src$$Register, false);
10034 %}
10035 ins_pipe(ialu_reg);
10036 %}
10037
10038 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10039 %{
10040 predicate(UseAPX && UseIncDec);
10041 match(Set dst (AddI (LoadI src) val));
10042 effect(KILL cr);
10043
10044 format %{ "eincl $dst, $src\t# int ndd" %}
10045 ins_encode %{
10046 __ eincl($dst$$Register, $src$$Address, false);
10047 %}
10048 ins_pipe(ialu_reg);
10049 %}
10050
10051 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10052 %{
10053 predicate(UseIncDec);
10054 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10055 effect(KILL cr);
10056
10057 ins_cost(125); // XXX
10058 format %{ "incl $dst\t# int" %}
10059 ins_encode %{
10060 __ incrementl($dst$$Address);
10061 %}
10062 ins_pipe(ialu_mem_imm);
10063 %}
10064
10065 // XXX why does that use AddI
10066 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10067 %{
10068 predicate(!UseAPX && UseIncDec);
10069 match(Set dst (AddI dst src));
10070 effect(KILL cr);
10071
10072 format %{ "decl $dst\t# int" %}
10073 ins_encode %{
10074 __ decrementl($dst$$Register);
10075 %}
10076 ins_pipe(ialu_reg);
10077 %}
10078
10079 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10080 %{
10081 predicate(UseAPX && UseIncDec);
10082 match(Set dst (AddI src val));
10083 effect(KILL cr);
10084 flag(PD::Flag_ndd_demotable_opr1);
10085
10086 format %{ "edecl $dst, $src\t# int ndd" %}
10087 ins_encode %{
10088 __ edecl($dst$$Register, $src$$Register, false);
10089 %}
10090 ins_pipe(ialu_reg);
10091 %}
10092
10093 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10094 %{
10095 predicate(UseAPX && UseIncDec);
10096 match(Set dst (AddI (LoadI src) val));
10097 effect(KILL cr);
10098
10099 format %{ "edecl $dst, $src\t# int ndd" %}
10100 ins_encode %{
10101 __ edecl($dst$$Register, $src$$Address, false);
10102 %}
10103 ins_pipe(ialu_reg);
10104 %}
10105
10106 // XXX why does that use AddI
10107 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10108 %{
10109 predicate(UseIncDec);
10110 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10111 effect(KILL cr);
10112
10113 ins_cost(125); // XXX
10114 format %{ "decl $dst\t# int" %}
10115 ins_encode %{
10116 __ decrementl($dst$$Address);
10117 %}
10118 ins_pipe(ialu_mem_imm);
10119 %}
10120
10121 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10122 %{
10123 predicate(VM_Version::supports_fast_2op_lea());
10124 match(Set dst (AddI (LShiftI index scale) disp));
10125
10126 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10127 ins_encode %{
10128 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10129 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10130 %}
10131 ins_pipe(ialu_reg_reg);
10132 %}
10133
10134 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10135 %{
10136 predicate(VM_Version::supports_fast_3op_lea());
10137 match(Set dst (AddI (AddI base index) disp));
10138
10139 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10140 ins_encode %{
10141 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10142 %}
10143 ins_pipe(ialu_reg_reg);
10144 %}
10145
10146 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10147 %{
10148 predicate(VM_Version::supports_fast_2op_lea());
10149 match(Set dst (AddI base (LShiftI index scale)));
10150
10151 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10152 ins_encode %{
10153 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10154 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10155 %}
10156 ins_pipe(ialu_reg_reg);
10157 %}
10158
10159 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10160 %{
10161 predicate(VM_Version::supports_fast_3op_lea());
10162 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10163
10164 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10165 ins_encode %{
10166 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10167 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10168 %}
10169 ins_pipe(ialu_reg_reg);
10170 %}
10171
10172 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10173 %{
10174 predicate(!UseAPX);
10175 match(Set dst (AddL dst src));
10176 effect(KILL cr);
10177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10178
10179 format %{ "addq $dst, $src\t# long" %}
10180 ins_encode %{
10181 __ addq($dst$$Register, $src$$Register);
10182 %}
10183 ins_pipe(ialu_reg_reg);
10184 %}
10185
10186 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10187 %{
10188 predicate(UseAPX);
10189 match(Set dst (AddL src1 src2));
10190 effect(KILL cr);
10191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10192
10193 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10194 ins_encode %{
10195 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10196 %}
10197 ins_pipe(ialu_reg_reg);
10198 %}
10199
10200 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10201 %{
10202 predicate(!UseAPX);
10203 match(Set dst (AddL dst src));
10204 effect(KILL cr);
10205 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10206
10207 format %{ "addq $dst, $src\t# long" %}
10208 ins_encode %{
10209 __ addq($dst$$Register, $src$$constant);
10210 %}
10211 ins_pipe( ialu_reg );
10212 %}
10213
10214 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10215 %{
10216 predicate(UseAPX);
10217 match(Set dst (AddL src1 src2));
10218 effect(KILL cr);
10219 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10220
10221 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10222 ins_encode %{
10223 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10224 %}
10225 ins_pipe( ialu_reg );
10226 %}
10227
10228 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10229 %{
10230 predicate(UseAPX);
10231 match(Set dst (AddL (LoadL src1) src2));
10232 effect(KILL cr);
10233 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10234
10235 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10236 ins_encode %{
10237 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10238 %}
10239 ins_pipe( ialu_reg );
10240 %}
10241
10242 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10243 %{
10244 predicate(!UseAPX);
10245 match(Set dst (AddL dst (LoadL src)));
10246 effect(KILL cr);
10247 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10248
10249 ins_cost(150); // XXX
10250 format %{ "addq $dst, $src\t# long" %}
10251 ins_encode %{
10252 __ addq($dst$$Register, $src$$Address);
10253 %}
10254 ins_pipe(ialu_reg_mem);
10255 %}
10256
10257 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10258 %{
10259 predicate(UseAPX);
10260 match(Set dst (AddL src1 (LoadL src2)));
10261 effect(KILL cr);
10262 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10263
10264 ins_cost(150);
10265 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10266 ins_encode %{
10267 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10268 %}
10269 ins_pipe(ialu_reg_mem);
10270 %}
10271
10272 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10273 %{
10274 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10275 effect(KILL cr);
10276 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10277
10278 ins_cost(150); // XXX
10279 format %{ "addq $dst, $src\t# long" %}
10280 ins_encode %{
10281 __ addq($dst$$Address, $src$$Register);
10282 %}
10283 ins_pipe(ialu_mem_reg);
10284 %}
10285
10286 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10287 %{
10288 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10289 effect(KILL cr);
10290 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10291
10292 ins_cost(125); // XXX
10293 format %{ "addq $dst, $src\t# long" %}
10294 ins_encode %{
10295 __ addq($dst$$Address, $src$$constant);
10296 %}
10297 ins_pipe(ialu_mem_imm);
10298 %}
10299
10300 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10301 %{
10302 predicate(!UseAPX && UseIncDec);
10303 match(Set dst (AddL dst src));
10304 effect(KILL cr);
10305
10306 format %{ "incq $dst\t# long" %}
10307 ins_encode %{
10308 __ incrementq($dst$$Register);
10309 %}
10310 ins_pipe(ialu_reg);
10311 %}
10312
10313 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10314 %{
10315 predicate(UseAPX && UseIncDec);
10316 match(Set dst (AddL src val));
10317 effect(KILL cr);
10318 flag(PD::Flag_ndd_demotable_opr1);
10319
10320 format %{ "eincq $dst, $src\t# long ndd" %}
10321 ins_encode %{
10322 __ eincq($dst$$Register, $src$$Register, false);
10323 %}
10324 ins_pipe(ialu_reg);
10325 %}
10326
10327 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10328 %{
10329 predicate(UseAPX && UseIncDec);
10330 match(Set dst (AddL (LoadL src) val));
10331 effect(KILL cr);
10332
10333 format %{ "eincq $dst, $src\t# long ndd" %}
10334 ins_encode %{
10335 __ eincq($dst$$Register, $src$$Address, false);
10336 %}
10337 ins_pipe(ialu_reg);
10338 %}
10339
10340 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10341 %{
10342 predicate(UseIncDec);
10343 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10344 effect(KILL cr);
10345
10346 ins_cost(125); // XXX
10347 format %{ "incq $dst\t# long" %}
10348 ins_encode %{
10349 __ incrementq($dst$$Address);
10350 %}
10351 ins_pipe(ialu_mem_imm);
10352 %}
10353
10354 // XXX why does that use AddL
10355 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10356 %{
10357 predicate(!UseAPX && UseIncDec);
10358 match(Set dst (AddL dst src));
10359 effect(KILL cr);
10360
10361 format %{ "decq $dst\t# long" %}
10362 ins_encode %{
10363 __ decrementq($dst$$Register);
10364 %}
10365 ins_pipe(ialu_reg);
10366 %}
10367
10368 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10369 %{
10370 predicate(UseAPX && UseIncDec);
10371 match(Set dst (AddL src val));
10372 effect(KILL cr);
10373 flag(PD::Flag_ndd_demotable_opr1);
10374
10375 format %{ "edecq $dst, $src\t# long ndd" %}
10376 ins_encode %{
10377 __ edecq($dst$$Register, $src$$Register, false);
10378 %}
10379 ins_pipe(ialu_reg);
10380 %}
10381
10382 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10383 %{
10384 predicate(UseAPX && UseIncDec);
10385 match(Set dst (AddL (LoadL src) val));
10386 effect(KILL cr);
10387
10388 format %{ "edecq $dst, $src\t# long ndd" %}
10389 ins_encode %{
10390 __ edecq($dst$$Register, $src$$Address, false);
10391 %}
10392 ins_pipe(ialu_reg);
10393 %}
10394
10395 // XXX why does that use AddL
10396 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10397 %{
10398 predicate(UseIncDec);
10399 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10400 effect(KILL cr);
10401
10402 ins_cost(125); // XXX
10403 format %{ "decq $dst\t# long" %}
10404 ins_encode %{
10405 __ decrementq($dst$$Address);
10406 %}
10407 ins_pipe(ialu_mem_imm);
10408 %}
10409
10410 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10411 %{
10412 predicate(VM_Version::supports_fast_2op_lea());
10413 match(Set dst (AddL (LShiftL index scale) disp));
10414
10415 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10416 ins_encode %{
10417 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10418 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10419 %}
10420 ins_pipe(ialu_reg_reg);
10421 %}
10422
10423 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10424 %{
10425 predicate(VM_Version::supports_fast_3op_lea());
10426 match(Set dst (AddL (AddL base index) disp));
10427
10428 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10429 ins_encode %{
10430 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10431 %}
10432 ins_pipe(ialu_reg_reg);
10433 %}
10434
10435 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10436 %{
10437 predicate(VM_Version::supports_fast_2op_lea());
10438 match(Set dst (AddL base (LShiftL index scale)));
10439
10440 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10441 ins_encode %{
10442 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10443 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10444 %}
10445 ins_pipe(ialu_reg_reg);
10446 %}
10447
10448 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10449 %{
10450 predicate(VM_Version::supports_fast_3op_lea());
10451 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10452
10453 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10454 ins_encode %{
10455 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10456 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10457 %}
10458 ins_pipe(ialu_reg_reg);
10459 %}
10460
10461 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10462 %{
10463 match(Set dst (AddP dst src));
10464 effect(KILL cr);
10465 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10466
10467 format %{ "addq $dst, $src\t# ptr" %}
10468 ins_encode %{
10469 __ addq($dst$$Register, $src$$Register);
10470 %}
10471 ins_pipe(ialu_reg_reg);
10472 %}
10473
10474 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10475 %{
10476 match(Set dst (AddP dst src));
10477 effect(KILL cr);
10478 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10479
10480 format %{ "addq $dst, $src\t# ptr" %}
10481 ins_encode %{
10482 __ addq($dst$$Register, $src$$constant);
10483 %}
10484 ins_pipe( ialu_reg );
10485 %}
10486
10487 // XXX addP mem ops ????
10488
10489 instruct checkCastPP(rRegP dst)
10490 %{
10491 match(Set dst (CheckCastPP dst));
10492
10493 size(0);
10494 format %{ "# checkcastPP of $dst" %}
10495 ins_encode(/* empty encoding */);
10496 ins_pipe(empty);
10497 %}
10498
10499 instruct castPP(rRegP dst)
10500 %{
10501 match(Set dst (CastPP dst));
10502
10503 size(0);
10504 format %{ "# castPP of $dst" %}
10505 ins_encode(/* empty encoding */);
10506 ins_pipe(empty);
10507 %}
10508
10509 instruct castII(rRegI dst)
10510 %{
10511 predicate(VerifyConstraintCasts == 0);
10512 match(Set dst (CastII dst));
10513
10514 size(0);
10515 format %{ "# castII of $dst" %}
10516 ins_encode(/* empty encoding */);
10517 ins_cost(0);
10518 ins_pipe(empty);
10519 %}
10520
10521 instruct castII_checked(rRegI dst, rFlagsReg cr)
10522 %{
10523 predicate(VerifyConstraintCasts > 0);
10524 match(Set dst (CastII dst));
10525
10526 effect(KILL cr);
10527 format %{ "# cast_checked_II $dst" %}
10528 ins_encode %{
10529 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10530 %}
10531 ins_pipe(pipe_slow);
10532 %}
10533
10534 instruct castLL(rRegL dst)
10535 %{
10536 predicate(VerifyConstraintCasts == 0);
10537 match(Set dst (CastLL dst));
10538
10539 size(0);
10540 format %{ "# castLL of $dst" %}
10541 ins_encode(/* empty encoding */);
10542 ins_cost(0);
10543 ins_pipe(empty);
10544 %}
10545
10546 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10547 %{
10548 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10549 match(Set dst (CastLL dst));
10550
10551 effect(KILL cr);
10552 format %{ "# cast_checked_LL $dst" %}
10553 ins_encode %{
10554 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10555 %}
10556 ins_pipe(pipe_slow);
10557 %}
10558
10559 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10560 %{
10561 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10562 match(Set dst (CastLL dst));
10563
10564 effect(KILL cr, TEMP tmp);
10565 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10566 ins_encode %{
10567 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10568 %}
10569 ins_pipe(pipe_slow);
10570 %}
10571
10572 instruct castFF(regF dst)
10573 %{
10574 match(Set dst (CastFF dst));
10575
10576 size(0);
10577 format %{ "# castFF of $dst" %}
10578 ins_encode(/* empty encoding */);
10579 ins_cost(0);
10580 ins_pipe(empty);
10581 %}
10582
10583 instruct castHH(regF dst)
10584 %{
10585 match(Set dst (CastHH dst));
10586
10587 size(0);
10588 format %{ "# castHH of $dst" %}
10589 ins_encode(/* empty encoding */);
10590 ins_cost(0);
10591 ins_pipe(empty);
10592 %}
10593
10594 instruct castDD(regD dst)
10595 %{
10596 match(Set dst (CastDD dst));
10597
10598 size(0);
10599 format %{ "# castDD of $dst" %}
10600 ins_encode(/* empty encoding */);
10601 ins_cost(0);
10602 ins_pipe(empty);
10603 %}
10604
10605 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10606 instruct compareAndSwapP(rRegI res,
10607 memory mem_ptr,
10608 rax_RegP oldval, rRegP newval,
10609 rFlagsReg cr)
10610 %{
10611 predicate(n->as_LoadStore()->barrier_data() == 0);
10612 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10613 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10614 effect(KILL cr, KILL oldval);
10615
10616 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10617 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10618 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10619 ins_encode %{
10620 __ lock();
10621 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10622 __ setcc(Assembler::equal, $res$$Register);
10623 %}
10624 ins_pipe( pipe_cmpxchg );
10625 %}
10626
10627 instruct compareAndSwapL(rRegI res,
10628 memory mem_ptr,
10629 rax_RegL oldval, rRegL newval,
10630 rFlagsReg cr)
10631 %{
10632 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10633 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10634 effect(KILL cr, KILL oldval);
10635
10636 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10637 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10638 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10639 ins_encode %{
10640 __ lock();
10641 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10642 __ setcc(Assembler::equal, $res$$Register);
10643 %}
10644 ins_pipe( pipe_cmpxchg );
10645 %}
10646
10647 instruct compareAndSwapI(rRegI res,
10648 memory mem_ptr,
10649 rax_RegI oldval, rRegI newval,
10650 rFlagsReg cr)
10651 %{
10652 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10653 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10654 effect(KILL cr, KILL oldval);
10655
10656 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10657 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10658 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10659 ins_encode %{
10660 __ lock();
10661 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10662 __ setcc(Assembler::equal, $res$$Register);
10663 %}
10664 ins_pipe( pipe_cmpxchg );
10665 %}
10666
10667 instruct compareAndSwapB(rRegI res,
10668 memory mem_ptr,
10669 rax_RegI oldval, rRegI newval,
10670 rFlagsReg cr)
10671 %{
10672 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10673 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10674 effect(KILL cr, KILL oldval);
10675
10676 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10677 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10678 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10679 ins_encode %{
10680 __ lock();
10681 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10682 __ setcc(Assembler::equal, $res$$Register);
10683 %}
10684 ins_pipe( pipe_cmpxchg );
10685 %}
10686
10687 instruct compareAndSwapS(rRegI res,
10688 memory mem_ptr,
10689 rax_RegI oldval, rRegI newval,
10690 rFlagsReg cr)
10691 %{
10692 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10693 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10694 effect(KILL cr, KILL oldval);
10695
10696 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10697 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10698 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10699 ins_encode %{
10700 __ lock();
10701 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10702 __ setcc(Assembler::equal, $res$$Register);
10703 %}
10704 ins_pipe( pipe_cmpxchg );
10705 %}
10706
10707 instruct compareAndSwapN(rRegI res,
10708 memory mem_ptr,
10709 rax_RegN oldval, rRegN newval,
10710 rFlagsReg cr) %{
10711 predicate(n->as_LoadStore()->barrier_data() == 0);
10712 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10713 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10714 effect(KILL cr, KILL oldval);
10715
10716 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10717 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10718 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10719 ins_encode %{
10720 __ lock();
10721 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10722 __ setcc(Assembler::equal, $res$$Register);
10723 %}
10724 ins_pipe( pipe_cmpxchg );
10725 %}
10726
10727 instruct compareAndExchangeB(
10728 memory mem_ptr,
10729 rax_RegI oldval, rRegI newval,
10730 rFlagsReg cr)
10731 %{
10732 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10733 effect(KILL cr);
10734
10735 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10736 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10737 ins_encode %{
10738 __ lock();
10739 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10740 %}
10741 ins_pipe( pipe_cmpxchg );
10742 %}
10743
10744 instruct compareAndExchangeS(
10745 memory mem_ptr,
10746 rax_RegI oldval, rRegI newval,
10747 rFlagsReg cr)
10748 %{
10749 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10750 effect(KILL cr);
10751
10752 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10753 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10754 ins_encode %{
10755 __ lock();
10756 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10757 %}
10758 ins_pipe( pipe_cmpxchg );
10759 %}
10760
10761 instruct compareAndExchangeI(
10762 memory mem_ptr,
10763 rax_RegI oldval, rRegI newval,
10764 rFlagsReg cr)
10765 %{
10766 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10767 effect(KILL cr);
10768
10769 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10770 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10771 ins_encode %{
10772 __ lock();
10773 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10774 %}
10775 ins_pipe( pipe_cmpxchg );
10776 %}
10777
10778 instruct compareAndExchangeL(
10779 memory mem_ptr,
10780 rax_RegL oldval, rRegL newval,
10781 rFlagsReg cr)
10782 %{
10783 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10784 effect(KILL cr);
10785
10786 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10787 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10788 ins_encode %{
10789 __ lock();
10790 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10791 %}
10792 ins_pipe( pipe_cmpxchg );
10793 %}
10794
10795 instruct compareAndExchangeN(
10796 memory mem_ptr,
10797 rax_RegN oldval, rRegN newval,
10798 rFlagsReg cr) %{
10799 predicate(n->as_LoadStore()->barrier_data() == 0);
10800 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10801 effect(KILL cr);
10802
10803 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10804 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10805 ins_encode %{
10806 __ lock();
10807 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10808 %}
10809 ins_pipe( pipe_cmpxchg );
10810 %}
10811
10812 instruct compareAndExchangeP(
10813 memory mem_ptr,
10814 rax_RegP oldval, rRegP newval,
10815 rFlagsReg cr)
10816 %{
10817 predicate(n->as_LoadStore()->barrier_data() == 0);
10818 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10819 effect(KILL cr);
10820
10821 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10822 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10823 ins_encode %{
10824 __ lock();
10825 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10826 %}
10827 ins_pipe( pipe_cmpxchg );
10828 %}
10829
10830 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10831 predicate(n->as_LoadStore()->result_not_used());
10832 match(Set dummy (GetAndAddB mem add));
10833 effect(KILL cr);
10834 format %{ "addb_lock $mem, $add" %}
10835 ins_encode %{
10836 __ lock();
10837 __ addb($mem$$Address, $add$$Register);
10838 %}
10839 ins_pipe(pipe_cmpxchg);
10840 %}
10841
10842 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10843 predicate(n->as_LoadStore()->result_not_used());
10844 match(Set dummy (GetAndAddB mem add));
10845 effect(KILL cr);
10846 format %{ "addb_lock $mem, $add" %}
10847 ins_encode %{
10848 __ lock();
10849 __ addb($mem$$Address, $add$$constant);
10850 %}
10851 ins_pipe(pipe_cmpxchg);
10852 %}
10853
10854 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10855 predicate(!n->as_LoadStore()->result_not_used());
10856 match(Set newval (GetAndAddB mem newval));
10857 effect(KILL cr);
10858 format %{ "xaddb_lock $mem, $newval" %}
10859 ins_encode %{
10860 __ lock();
10861 __ xaddb($mem$$Address, $newval$$Register);
10862 %}
10863 ins_pipe(pipe_cmpxchg);
10864 %}
10865
10866 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10867 predicate(n->as_LoadStore()->result_not_used());
10868 match(Set dummy (GetAndAddS mem add));
10869 effect(KILL cr);
10870 format %{ "addw_lock $mem, $add" %}
10871 ins_encode %{
10872 __ lock();
10873 __ addw($mem$$Address, $add$$Register);
10874 %}
10875 ins_pipe(pipe_cmpxchg);
10876 %}
10877
10878 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10879 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10880 match(Set dummy (GetAndAddS mem add));
10881 effect(KILL cr);
10882 format %{ "addw_lock $mem, $add" %}
10883 ins_encode %{
10884 __ lock();
10885 __ addw($mem$$Address, $add$$constant);
10886 %}
10887 ins_pipe(pipe_cmpxchg);
10888 %}
10889
10890 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10891 predicate(!n->as_LoadStore()->result_not_used());
10892 match(Set newval (GetAndAddS mem newval));
10893 effect(KILL cr);
10894 format %{ "xaddw_lock $mem, $newval" %}
10895 ins_encode %{
10896 __ lock();
10897 __ xaddw($mem$$Address, $newval$$Register);
10898 %}
10899 ins_pipe(pipe_cmpxchg);
10900 %}
10901
10902 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10903 predicate(n->as_LoadStore()->result_not_used());
10904 match(Set dummy (GetAndAddI mem add));
10905 effect(KILL cr);
10906 format %{ "addl_lock $mem, $add" %}
10907 ins_encode %{
10908 __ lock();
10909 __ addl($mem$$Address, $add$$Register);
10910 %}
10911 ins_pipe(pipe_cmpxchg);
10912 %}
10913
10914 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10915 predicate(n->as_LoadStore()->result_not_used());
10916 match(Set dummy (GetAndAddI mem add));
10917 effect(KILL cr);
10918 format %{ "addl_lock $mem, $add" %}
10919 ins_encode %{
10920 __ lock();
10921 __ addl($mem$$Address, $add$$constant);
10922 %}
10923 ins_pipe(pipe_cmpxchg);
10924 %}
10925
10926 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10927 predicate(!n->as_LoadStore()->result_not_used());
10928 match(Set newval (GetAndAddI mem newval));
10929 effect(KILL cr);
10930 format %{ "xaddl_lock $mem, $newval" %}
10931 ins_encode %{
10932 __ lock();
10933 __ xaddl($mem$$Address, $newval$$Register);
10934 %}
10935 ins_pipe(pipe_cmpxchg);
10936 %}
10937
10938 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10939 predicate(n->as_LoadStore()->result_not_used());
10940 match(Set dummy (GetAndAddL mem add));
10941 effect(KILL cr);
10942 format %{ "addq_lock $mem, $add" %}
10943 ins_encode %{
10944 __ lock();
10945 __ addq($mem$$Address, $add$$Register);
10946 %}
10947 ins_pipe(pipe_cmpxchg);
10948 %}
10949
10950 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10951 predicate(n->as_LoadStore()->result_not_used());
10952 match(Set dummy (GetAndAddL mem add));
10953 effect(KILL cr);
10954 format %{ "addq_lock $mem, $add" %}
10955 ins_encode %{
10956 __ lock();
10957 __ addq($mem$$Address, $add$$constant);
10958 %}
10959 ins_pipe(pipe_cmpxchg);
10960 %}
10961
10962 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10963 predicate(!n->as_LoadStore()->result_not_used());
10964 match(Set newval (GetAndAddL mem newval));
10965 effect(KILL cr);
10966 format %{ "xaddq_lock $mem, $newval" %}
10967 ins_encode %{
10968 __ lock();
10969 __ xaddq($mem$$Address, $newval$$Register);
10970 %}
10971 ins_pipe(pipe_cmpxchg);
10972 %}
10973
10974 instruct xchgB( memory mem, rRegI newval) %{
10975 match(Set newval (GetAndSetB mem newval));
10976 format %{ "XCHGB $newval,[$mem]" %}
10977 ins_encode %{
10978 __ xchgb($newval$$Register, $mem$$Address);
10979 %}
10980 ins_pipe( pipe_cmpxchg );
10981 %}
10982
10983 instruct xchgS( memory mem, rRegI newval) %{
10984 match(Set newval (GetAndSetS mem newval));
10985 format %{ "XCHGW $newval,[$mem]" %}
10986 ins_encode %{
10987 __ xchgw($newval$$Register, $mem$$Address);
10988 %}
10989 ins_pipe( pipe_cmpxchg );
10990 %}
10991
10992 instruct xchgI( memory mem, rRegI newval) %{
10993 match(Set newval (GetAndSetI mem newval));
10994 format %{ "XCHGL $newval,[$mem]" %}
10995 ins_encode %{
10996 __ xchgl($newval$$Register, $mem$$Address);
10997 %}
10998 ins_pipe( pipe_cmpxchg );
10999 %}
11000
11001 instruct xchgL( memory mem, rRegL newval) %{
11002 match(Set newval (GetAndSetL mem newval));
11003 format %{ "XCHGL $newval,[$mem]" %}
11004 ins_encode %{
11005 __ xchgq($newval$$Register, $mem$$Address);
11006 %}
11007 ins_pipe( pipe_cmpxchg );
11008 %}
11009
11010 instruct xchgP( memory mem, rRegP newval) %{
11011 match(Set newval (GetAndSetP mem newval));
11012 predicate(n->as_LoadStore()->barrier_data() == 0);
11013 format %{ "XCHGQ $newval,[$mem]" %}
11014 ins_encode %{
11015 __ xchgq($newval$$Register, $mem$$Address);
11016 %}
11017 ins_pipe( pipe_cmpxchg );
11018 %}
11019
11020 instruct xchgN( memory mem, rRegN newval) %{
11021 predicate(n->as_LoadStore()->barrier_data() == 0);
11022 match(Set newval (GetAndSetN mem newval));
11023 format %{ "XCHGL $newval,$mem]" %}
11024 ins_encode %{
11025 __ xchgl($newval$$Register, $mem$$Address);
11026 %}
11027 ins_pipe( pipe_cmpxchg );
11028 %}
11029
11030 //----------Abs Instructions-------------------------------------------
11031
11032 // Integer Absolute Instructions
11033 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11034 %{
11035 match(Set dst (AbsI src));
11036 effect(TEMP dst, KILL cr);
11037 format %{ "xorl $dst, $dst\t# abs int\n\t"
11038 "subl $dst, $src\n\t"
11039 "cmovll $dst, $src" %}
11040 ins_encode %{
11041 __ xorl($dst$$Register, $dst$$Register);
11042 __ subl($dst$$Register, $src$$Register);
11043 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11044 %}
11045
11046 ins_pipe(ialu_reg_reg);
11047 %}
11048
11049 // Long Absolute Instructions
11050 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11051 %{
11052 match(Set dst (AbsL src));
11053 effect(TEMP dst, KILL cr);
11054 format %{ "xorl $dst, $dst\t# abs long\n\t"
11055 "subq $dst, $src\n\t"
11056 "cmovlq $dst, $src" %}
11057 ins_encode %{
11058 __ xorl($dst$$Register, $dst$$Register);
11059 __ subq($dst$$Register, $src$$Register);
11060 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11061 %}
11062
11063 ins_pipe(ialu_reg_reg);
11064 %}
11065
11066 //----------Subtraction Instructions-------------------------------------------
11067
11068 // Integer Subtraction Instructions
11069 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11070 %{
11071 predicate(!UseAPX);
11072 match(Set dst (SubI dst src));
11073 effect(KILL cr);
11074 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11075
11076 format %{ "subl $dst, $src\t# int" %}
11077 ins_encode %{
11078 __ subl($dst$$Register, $src$$Register);
11079 %}
11080 ins_pipe(ialu_reg_reg);
11081 %}
11082
11083 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11084 %{
11085 predicate(UseAPX);
11086 match(Set dst (SubI src1 src2));
11087 effect(KILL cr);
11088 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11089
11090 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11091 ins_encode %{
11092 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11093 %}
11094 ins_pipe(ialu_reg_reg);
11095 %}
11096
11097 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11098 %{
11099 predicate(UseAPX);
11100 match(Set dst (SubI src1 src2));
11101 effect(KILL cr);
11102 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11103
11104 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11105 ins_encode %{
11106 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11107 %}
11108 ins_pipe(ialu_reg_reg);
11109 %}
11110
11111 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11112 %{
11113 predicate(UseAPX);
11114 match(Set dst (SubI (LoadI src1) src2));
11115 effect(KILL cr);
11116 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11117
11118 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11119 ins_encode %{
11120 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11121 %}
11122 ins_pipe(ialu_reg_reg);
11123 %}
11124
11125 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11126 %{
11127 predicate(!UseAPX);
11128 match(Set dst (SubI dst (LoadI src)));
11129 effect(KILL cr);
11130 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11131
11132 ins_cost(150);
11133 format %{ "subl $dst, $src\t# int" %}
11134 ins_encode %{
11135 __ subl($dst$$Register, $src$$Address);
11136 %}
11137 ins_pipe(ialu_reg_mem);
11138 %}
11139
11140 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11141 %{
11142 predicate(UseAPX);
11143 match(Set dst (SubI src1 (LoadI src2)));
11144 effect(KILL cr);
11145 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11146
11147 ins_cost(150);
11148 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11149 ins_encode %{
11150 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11151 %}
11152 ins_pipe(ialu_reg_mem);
11153 %}
11154
11155 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11156 %{
11157 predicate(UseAPX);
11158 match(Set dst (SubI (LoadI src1) src2));
11159 effect(KILL cr);
11160 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11161
11162 ins_cost(150);
11163 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11164 ins_encode %{
11165 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11166 %}
11167 ins_pipe(ialu_reg_mem);
11168 %}
11169
11170 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11171 %{
11172 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11173 effect(KILL cr);
11174 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11175
11176 ins_cost(150);
11177 format %{ "subl $dst, $src\t# int" %}
11178 ins_encode %{
11179 __ subl($dst$$Address, $src$$Register);
11180 %}
11181 ins_pipe(ialu_mem_reg);
11182 %}
11183
11184 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11185 %{
11186 predicate(!UseAPX);
11187 match(Set dst (SubL dst src));
11188 effect(KILL cr);
11189 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11190
11191 format %{ "subq $dst, $src\t# long" %}
11192 ins_encode %{
11193 __ subq($dst$$Register, $src$$Register);
11194 %}
11195 ins_pipe(ialu_reg_reg);
11196 %}
11197
11198 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11199 %{
11200 predicate(UseAPX);
11201 match(Set dst (SubL src1 src2));
11202 effect(KILL cr);
11203 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11204
11205 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11206 ins_encode %{
11207 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11208 %}
11209 ins_pipe(ialu_reg_reg);
11210 %}
11211
11212 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11213 %{
11214 predicate(UseAPX);
11215 match(Set dst (SubL src1 src2));
11216 effect(KILL cr);
11217 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11218
11219 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11220 ins_encode %{
11221 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11222 %}
11223 ins_pipe(ialu_reg_reg);
11224 %}
11225
11226 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11227 %{
11228 predicate(UseAPX);
11229 match(Set dst (SubL (LoadL src1) src2));
11230 effect(KILL cr);
11231 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11232
11233 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11234 ins_encode %{
11235 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11236 %}
11237 ins_pipe(ialu_reg_reg);
11238 %}
11239
11240 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11241 %{
11242 predicate(!UseAPX);
11243 match(Set dst (SubL dst (LoadL src)));
11244 effect(KILL cr);
11245 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11246
11247 ins_cost(150);
11248 format %{ "subq $dst, $src\t# long" %}
11249 ins_encode %{
11250 __ subq($dst$$Register, $src$$Address);
11251 %}
11252 ins_pipe(ialu_reg_mem);
11253 %}
11254
11255 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11256 %{
11257 predicate(UseAPX);
11258 match(Set dst (SubL src1 (LoadL src2)));
11259 effect(KILL cr);
11260 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11261
11262 ins_cost(150);
11263 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11264 ins_encode %{
11265 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11266 %}
11267 ins_pipe(ialu_reg_mem);
11268 %}
11269
11270 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11271 %{
11272 predicate(UseAPX);
11273 match(Set dst (SubL (LoadL src1) src2));
11274 effect(KILL cr);
11275 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11276
11277 ins_cost(150);
11278 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11279 ins_encode %{
11280 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11281 %}
11282 ins_pipe(ialu_reg_mem);
11283 %}
11284
11285 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11286 %{
11287 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11288 effect(KILL cr);
11289 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11290
11291 ins_cost(150);
11292 format %{ "subq $dst, $src\t# long" %}
11293 ins_encode %{
11294 __ subq($dst$$Address, $src$$Register);
11295 %}
11296 ins_pipe(ialu_mem_reg);
11297 %}
11298
11299 // Subtract from a pointer
11300 // XXX hmpf???
11301 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11302 %{
11303 match(Set dst (AddP dst (SubI zero src)));
11304 effect(KILL cr);
11305
11306 format %{ "subq $dst, $src\t# ptr - int" %}
11307 ins_encode %{
11308 __ subq($dst$$Register, $src$$Register);
11309 %}
11310 ins_pipe(ialu_reg_reg);
11311 %}
11312
11313 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11314 %{
11315 predicate(!UseAPX);
11316 match(Set dst (SubI zero dst));
11317 effect(KILL cr);
11318 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11319
11320 format %{ "negl $dst\t# int" %}
11321 ins_encode %{
11322 __ negl($dst$$Register);
11323 %}
11324 ins_pipe(ialu_reg);
11325 %}
11326
11327 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11328 %{
11329 predicate(UseAPX);
11330 match(Set dst (SubI zero src));
11331 effect(KILL cr);
11332 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11333
11334 format %{ "enegl $dst, $src\t# int ndd" %}
11335 ins_encode %{
11336 __ enegl($dst$$Register, $src$$Register, false);
11337 %}
11338 ins_pipe(ialu_reg);
11339 %}
11340
11341 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11342 %{
11343 predicate(!UseAPX);
11344 match(Set dst (NegI dst));
11345 effect(KILL cr);
11346 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11347
11348 format %{ "negl $dst\t# int" %}
11349 ins_encode %{
11350 __ negl($dst$$Register);
11351 %}
11352 ins_pipe(ialu_reg);
11353 %}
11354
11355 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11356 %{
11357 predicate(UseAPX);
11358 match(Set dst (NegI src));
11359 effect(KILL cr);
11360 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11361
11362 format %{ "enegl $dst, $src\t# int ndd" %}
11363 ins_encode %{
11364 __ enegl($dst$$Register, $src$$Register, false);
11365 %}
11366 ins_pipe(ialu_reg);
11367 %}
11368
11369 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11370 %{
11371 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11372 effect(KILL cr);
11373 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11374
11375 format %{ "negl $dst\t# int" %}
11376 ins_encode %{
11377 __ negl($dst$$Address);
11378 %}
11379 ins_pipe(ialu_reg);
11380 %}
11381
11382 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11383 %{
11384 predicate(!UseAPX);
11385 match(Set dst (SubL zero dst));
11386 effect(KILL cr);
11387 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11388
11389 format %{ "negq $dst\t# long" %}
11390 ins_encode %{
11391 __ negq($dst$$Register);
11392 %}
11393 ins_pipe(ialu_reg);
11394 %}
11395
11396 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11397 %{
11398 predicate(UseAPX);
11399 match(Set dst (SubL zero src));
11400 effect(KILL cr);
11401 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11402
11403 format %{ "enegq $dst, $src\t# long ndd" %}
11404 ins_encode %{
11405 __ enegq($dst$$Register, $src$$Register, false);
11406 %}
11407 ins_pipe(ialu_reg);
11408 %}
11409
11410 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11411 %{
11412 predicate(!UseAPX);
11413 match(Set dst (NegL dst));
11414 effect(KILL cr);
11415 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11416
11417 format %{ "negq $dst\t# int" %}
11418 ins_encode %{
11419 __ negq($dst$$Register);
11420 %}
11421 ins_pipe(ialu_reg);
11422 %}
11423
11424 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11425 %{
11426 predicate(UseAPX);
11427 match(Set dst (NegL src));
11428 effect(KILL cr);
11429 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11430
11431 format %{ "enegq $dst, $src\t# long ndd" %}
11432 ins_encode %{
11433 __ enegq($dst$$Register, $src$$Register, false);
11434 %}
11435 ins_pipe(ialu_reg);
11436 %}
11437
11438 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11439 %{
11440 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11441 effect(KILL cr);
11442 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11443
11444 format %{ "negq $dst\t# long" %}
11445 ins_encode %{
11446 __ negq($dst$$Address);
11447 %}
11448 ins_pipe(ialu_reg);
11449 %}
11450
11451 //----------Multiplication/Division Instructions-------------------------------
11452 // Integer Multiplication Instructions
11453 // Multiply Register
11454
11455 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11456 %{
11457 predicate(!UseAPX);
11458 match(Set dst (MulI dst src));
11459 effect(KILL cr);
11460
11461 ins_cost(300);
11462 format %{ "imull $dst, $src\t# int" %}
11463 ins_encode %{
11464 __ imull($dst$$Register, $src$$Register);
11465 %}
11466 ins_pipe(ialu_reg_reg_alu0);
11467 %}
11468
11469 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11470 %{
11471 predicate(UseAPX);
11472 match(Set dst (MulI src1 src2));
11473 effect(KILL cr);
11474 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11475
11476 ins_cost(300);
11477 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11478 ins_encode %{
11479 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11480 %}
11481 ins_pipe(ialu_reg_reg_alu0);
11482 %}
11483
11484 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11485 %{
11486 match(Set dst (MulI src imm));
11487 effect(KILL cr);
11488
11489 ins_cost(300);
11490 format %{ "imull $dst, $src, $imm\t# int" %}
11491 ins_encode %{
11492 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11493 %}
11494 ins_pipe(ialu_reg_reg_alu0);
11495 %}
11496
11497 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11498 %{
11499 predicate(!UseAPX);
11500 match(Set dst (MulI dst (LoadI src)));
11501 effect(KILL cr);
11502
11503 ins_cost(350);
11504 format %{ "imull $dst, $src\t# int" %}
11505 ins_encode %{
11506 __ imull($dst$$Register, $src$$Address);
11507 %}
11508 ins_pipe(ialu_reg_mem_alu0);
11509 %}
11510
11511 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11512 %{
11513 predicate(UseAPX);
11514 match(Set dst (MulI src1 (LoadI src2)));
11515 effect(KILL cr);
11516 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11517
11518 ins_cost(350);
11519 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11520 ins_encode %{
11521 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11522 %}
11523 ins_pipe(ialu_reg_mem_alu0);
11524 %}
11525
11526 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11527 %{
11528 match(Set dst (MulI (LoadI src) imm));
11529 effect(KILL cr);
11530
11531 ins_cost(300);
11532 format %{ "imull $dst, $src, $imm\t# int" %}
11533 ins_encode %{
11534 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11535 %}
11536 ins_pipe(ialu_reg_mem_alu0);
11537 %}
11538
11539 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11540 %{
11541 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11542 effect(KILL cr, KILL src2);
11543
11544 expand %{ mulI_rReg(dst, src1, cr);
11545 mulI_rReg(src2, src3, cr);
11546 addI_rReg(dst, src2, cr); %}
11547 %}
11548
11549 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11550 %{
11551 predicate(!UseAPX);
11552 match(Set dst (MulL dst src));
11553 effect(KILL cr);
11554
11555 ins_cost(300);
11556 format %{ "imulq $dst, $src\t# long" %}
11557 ins_encode %{
11558 __ imulq($dst$$Register, $src$$Register);
11559 %}
11560 ins_pipe(ialu_reg_reg_alu0);
11561 %}
11562
11563 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11564 %{
11565 predicate(UseAPX);
11566 match(Set dst (MulL src1 src2));
11567 effect(KILL cr);
11568 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11569
11570 ins_cost(300);
11571 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11572 ins_encode %{
11573 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11574 %}
11575 ins_pipe(ialu_reg_reg_alu0);
11576 %}
11577
11578 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11579 %{
11580 match(Set dst (MulL src imm));
11581 effect(KILL cr);
11582
11583 ins_cost(300);
11584 format %{ "imulq $dst, $src, $imm\t# long" %}
11585 ins_encode %{
11586 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11587 %}
11588 ins_pipe(ialu_reg_reg_alu0);
11589 %}
11590
11591 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11592 %{
11593 predicate(!UseAPX);
11594 match(Set dst (MulL dst (LoadL src)));
11595 effect(KILL cr);
11596
11597 ins_cost(350);
11598 format %{ "imulq $dst, $src\t# long" %}
11599 ins_encode %{
11600 __ imulq($dst$$Register, $src$$Address);
11601 %}
11602 ins_pipe(ialu_reg_mem_alu0);
11603 %}
11604
11605 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11606 %{
11607 predicate(UseAPX);
11608 match(Set dst (MulL src1 (LoadL src2)));
11609 effect(KILL cr);
11610 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11611
11612 ins_cost(350);
11613 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11614 ins_encode %{
11615 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11616 %}
11617 ins_pipe(ialu_reg_mem_alu0);
11618 %}
11619
11620 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11621 %{
11622 match(Set dst (MulL (LoadL src) imm));
11623 effect(KILL cr);
11624
11625 ins_cost(300);
11626 format %{ "imulq $dst, $src, $imm\t# long" %}
11627 ins_encode %{
11628 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11629 %}
11630 ins_pipe(ialu_reg_mem_alu0);
11631 %}
11632
11633 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11634 %{
11635 match(Set dst (MulHiL src rax));
11636 effect(USE_KILL rax, KILL cr);
11637
11638 ins_cost(300);
11639 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11640 ins_encode %{
11641 __ imulq($src$$Register);
11642 %}
11643 ins_pipe(ialu_reg_reg_alu0);
11644 %}
11645
11646 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11647 %{
11648 match(Set dst (UMulHiL src rax));
11649 effect(USE_KILL rax, KILL cr);
11650
11651 ins_cost(300);
11652 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11653 ins_encode %{
11654 __ mulq($src$$Register);
11655 %}
11656 ins_pipe(ialu_reg_reg_alu0);
11657 %}
11658
11659 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11660 rFlagsReg cr)
11661 %{
11662 match(Set rax (DivI rax div));
11663 effect(KILL rdx, KILL cr);
11664
11665 ins_cost(30*100+10*100); // XXX
11666 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11667 "jne,s normal\n\t"
11668 "xorl rdx, rdx\n\t"
11669 "cmpl $div, -1\n\t"
11670 "je,s done\n"
11671 "normal: cdql\n\t"
11672 "idivl $div\n"
11673 "done:" %}
11674 ins_encode(cdql_enc(div));
11675 ins_pipe(ialu_reg_reg_alu0);
11676 %}
11677
11678 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11679 rFlagsReg cr)
11680 %{
11681 match(Set rax (DivL rax div));
11682 effect(KILL rdx, KILL cr);
11683
11684 ins_cost(30*100+10*100); // XXX
11685 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11686 "cmpq rax, rdx\n\t"
11687 "jne,s normal\n\t"
11688 "xorl rdx, rdx\n\t"
11689 "cmpq $div, -1\n\t"
11690 "je,s done\n"
11691 "normal: cdqq\n\t"
11692 "idivq $div\n"
11693 "done:" %}
11694 ins_encode(cdqq_enc(div));
11695 ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697
11698 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11699 %{
11700 match(Set rax (UDivI rax div));
11701 effect(KILL rdx, KILL cr);
11702
11703 ins_cost(300);
11704 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11705 ins_encode %{
11706 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11707 %}
11708 ins_pipe(ialu_reg_reg_alu0);
11709 %}
11710
11711 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11712 %{
11713 match(Set rax (UDivL rax div));
11714 effect(KILL rdx, KILL cr);
11715
11716 ins_cost(300);
11717 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11718 ins_encode %{
11719 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11720 %}
11721 ins_pipe(ialu_reg_reg_alu0);
11722 %}
11723
11724 // Integer DIVMOD with Register, both quotient and mod results
11725 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11726 rFlagsReg cr)
11727 %{
11728 match(DivModI rax div);
11729 effect(KILL cr);
11730
11731 ins_cost(30*100+10*100); // XXX
11732 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11733 "jne,s normal\n\t"
11734 "xorl rdx, rdx\n\t"
11735 "cmpl $div, -1\n\t"
11736 "je,s done\n"
11737 "normal: cdql\n\t"
11738 "idivl $div\n"
11739 "done:" %}
11740 ins_encode(cdql_enc(div));
11741 ins_pipe(pipe_slow);
11742 %}
11743
11744 // Long DIVMOD with Register, both quotient and mod results
11745 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11746 rFlagsReg cr)
11747 %{
11748 match(DivModL rax div);
11749 effect(KILL cr);
11750
11751 ins_cost(30*100+10*100); // XXX
11752 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11753 "cmpq rax, rdx\n\t"
11754 "jne,s normal\n\t"
11755 "xorl rdx, rdx\n\t"
11756 "cmpq $div, -1\n\t"
11757 "je,s done\n"
11758 "normal: cdqq\n\t"
11759 "idivq $div\n"
11760 "done:" %}
11761 ins_encode(cdqq_enc(div));
11762 ins_pipe(pipe_slow);
11763 %}
11764
11765 // Unsigned integer DIVMOD with Register, both quotient and mod results
11766 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11767 no_rax_rdx_RegI div, rFlagsReg cr)
11768 %{
11769 match(UDivModI rax div);
11770 effect(TEMP tmp, KILL cr);
11771
11772 ins_cost(300);
11773 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11774 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11775 %}
11776 ins_encode %{
11777 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11778 %}
11779 ins_pipe(pipe_slow);
11780 %}
11781
11782 // Unsigned long DIVMOD with Register, both quotient and mod results
11783 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11784 no_rax_rdx_RegL div, rFlagsReg cr)
11785 %{
11786 match(UDivModL rax div);
11787 effect(TEMP tmp, KILL cr);
11788
11789 ins_cost(300);
11790 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11791 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11792 %}
11793 ins_encode %{
11794 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11795 %}
11796 ins_pipe(pipe_slow);
11797 %}
11798
11799 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11800 rFlagsReg cr)
11801 %{
11802 match(Set rdx (ModI rax div));
11803 effect(KILL rax, KILL cr);
11804
11805 ins_cost(300); // XXX
11806 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11807 "jne,s normal\n\t"
11808 "xorl rdx, rdx\n\t"
11809 "cmpl $div, -1\n\t"
11810 "je,s done\n"
11811 "normal: cdql\n\t"
11812 "idivl $div\n"
11813 "done:" %}
11814 ins_encode(cdql_enc(div));
11815 ins_pipe(ialu_reg_reg_alu0);
11816 %}
11817
11818 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11819 rFlagsReg cr)
11820 %{
11821 match(Set rdx (ModL rax div));
11822 effect(KILL rax, KILL cr);
11823
11824 ins_cost(300); // XXX
11825 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11826 "cmpq rax, rdx\n\t"
11827 "jne,s normal\n\t"
11828 "xorl rdx, rdx\n\t"
11829 "cmpq $div, -1\n\t"
11830 "je,s done\n"
11831 "normal: cdqq\n\t"
11832 "idivq $div\n"
11833 "done:" %}
11834 ins_encode(cdqq_enc(div));
11835 ins_pipe(ialu_reg_reg_alu0);
11836 %}
11837
11838 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11839 %{
11840 match(Set rdx (UModI rax div));
11841 effect(KILL rax, KILL cr);
11842
11843 ins_cost(300);
11844 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11845 ins_encode %{
11846 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11847 %}
11848 ins_pipe(ialu_reg_reg_alu0);
11849 %}
11850
11851 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11852 %{
11853 match(Set rdx (UModL rax div));
11854 effect(KILL rax, KILL cr);
11855
11856 ins_cost(300);
11857 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11858 ins_encode %{
11859 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11860 %}
11861 ins_pipe(ialu_reg_reg_alu0);
11862 %}
11863
11864 // Integer Shift Instructions
11865 // Shift Left by one, two, three
11866 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11867 %{
11868 predicate(!UseAPX);
11869 match(Set dst (LShiftI dst shift));
11870 effect(KILL cr);
11871
11872 format %{ "sall $dst, $shift" %}
11873 ins_encode %{
11874 __ sall($dst$$Register, $shift$$constant);
11875 %}
11876 ins_pipe(ialu_reg);
11877 %}
11878
11879 // Shift Left by one, two, three
11880 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11881 %{
11882 predicate(UseAPX);
11883 match(Set dst (LShiftI src shift));
11884 effect(KILL cr);
11885 flag(PD::Flag_ndd_demotable_opr1);
11886
11887 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11888 ins_encode %{
11889 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11890 %}
11891 ins_pipe(ialu_reg);
11892 %}
11893
11894 // Shift Left by 8-bit immediate
11895 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11896 %{
11897 predicate(!UseAPX);
11898 match(Set dst (LShiftI dst shift));
11899 effect(KILL cr);
11900
11901 format %{ "sall $dst, $shift" %}
11902 ins_encode %{
11903 __ sall($dst$$Register, $shift$$constant);
11904 %}
11905 ins_pipe(ialu_reg);
11906 %}
11907
11908 // Shift Left by 8-bit immediate
11909 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11910 %{
11911 predicate(UseAPX);
11912 match(Set dst (LShiftI src shift));
11913 effect(KILL cr);
11914 flag(PD::Flag_ndd_demotable_opr1);
11915
11916 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11917 ins_encode %{
11918 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11919 %}
11920 ins_pipe(ialu_reg);
11921 %}
11922
11923 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11924 %{
11925 predicate(UseAPX);
11926 match(Set dst (LShiftI (LoadI src) shift));
11927 effect(KILL cr);
11928
11929 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11930 ins_encode %{
11931 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11932 %}
11933 ins_pipe(ialu_reg);
11934 %}
11935
11936 // Shift Left by 8-bit immediate
11937 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11938 %{
11939 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11940 effect(KILL cr);
11941
11942 format %{ "sall $dst, $shift" %}
11943 ins_encode %{
11944 __ sall($dst$$Address, $shift$$constant);
11945 %}
11946 ins_pipe(ialu_mem_imm);
11947 %}
11948
11949 // Shift Left by variable
11950 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11951 %{
11952 predicate(!VM_Version::supports_bmi2());
11953 match(Set dst (LShiftI dst shift));
11954 effect(KILL cr);
11955
11956 format %{ "sall $dst, $shift" %}
11957 ins_encode %{
11958 __ sall($dst$$Register);
11959 %}
11960 ins_pipe(ialu_reg_reg);
11961 %}
11962
11963 // Shift Left by variable
11964 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11965 %{
11966 predicate(!VM_Version::supports_bmi2());
11967 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11968 effect(KILL cr);
11969
11970 format %{ "sall $dst, $shift" %}
11971 ins_encode %{
11972 __ sall($dst$$Address);
11973 %}
11974 ins_pipe(ialu_mem_reg);
11975 %}
11976
11977 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11978 %{
11979 predicate(VM_Version::supports_bmi2());
11980 match(Set dst (LShiftI src shift));
11981
11982 format %{ "shlxl $dst, $src, $shift" %}
11983 ins_encode %{
11984 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11985 %}
11986 ins_pipe(ialu_reg_reg);
11987 %}
11988
11989 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11990 %{
11991 predicate(VM_Version::supports_bmi2());
11992 match(Set dst (LShiftI (LoadI src) shift));
11993 ins_cost(175);
11994 format %{ "shlxl $dst, $src, $shift" %}
11995 ins_encode %{
11996 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11997 %}
11998 ins_pipe(ialu_reg_mem);
11999 %}
12000
12001 // Arithmetic Shift Right by 8-bit immediate
12002 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12003 %{
12004 predicate(!UseAPX);
12005 match(Set dst (RShiftI dst shift));
12006 effect(KILL cr);
12007
12008 format %{ "sarl $dst, $shift" %}
12009 ins_encode %{
12010 __ sarl($dst$$Register, $shift$$constant);
12011 %}
12012 ins_pipe(ialu_mem_imm);
12013 %}
12014
12015 // Arithmetic Shift Right by 8-bit immediate
12016 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12017 %{
12018 predicate(UseAPX);
12019 match(Set dst (RShiftI src shift));
12020 effect(KILL cr);
12021 flag(PD::Flag_ndd_demotable_opr1);
12022
12023 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12024 ins_encode %{
12025 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12026 %}
12027 ins_pipe(ialu_mem_imm);
12028 %}
12029
12030 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12031 %{
12032 predicate(UseAPX);
12033 match(Set dst (RShiftI (LoadI src) shift));
12034 effect(KILL cr);
12035
12036 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12037 ins_encode %{
12038 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12039 %}
12040 ins_pipe(ialu_mem_imm);
12041 %}
12042
12043 // Arithmetic Shift Right by 8-bit immediate
12044 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12045 %{
12046 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12047 effect(KILL cr);
12048
12049 format %{ "sarl $dst, $shift" %}
12050 ins_encode %{
12051 __ sarl($dst$$Address, $shift$$constant);
12052 %}
12053 ins_pipe(ialu_mem_imm);
12054 %}
12055
12056 // Arithmetic Shift Right by variable
12057 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12058 %{
12059 predicate(!VM_Version::supports_bmi2());
12060 match(Set dst (RShiftI dst shift));
12061 effect(KILL cr);
12062
12063 format %{ "sarl $dst, $shift" %}
12064 ins_encode %{
12065 __ sarl($dst$$Register);
12066 %}
12067 ins_pipe(ialu_reg_reg);
12068 %}
12069
12070 // Arithmetic Shift Right by variable
12071 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12072 %{
12073 predicate(!VM_Version::supports_bmi2());
12074 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12075 effect(KILL cr);
12076
12077 format %{ "sarl $dst, $shift" %}
12078 ins_encode %{
12079 __ sarl($dst$$Address);
12080 %}
12081 ins_pipe(ialu_mem_reg);
12082 %}
12083
12084 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12085 %{
12086 predicate(VM_Version::supports_bmi2());
12087 match(Set dst (RShiftI src shift));
12088
12089 format %{ "sarxl $dst, $src, $shift" %}
12090 ins_encode %{
12091 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12092 %}
12093 ins_pipe(ialu_reg_reg);
12094 %}
12095
12096 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12097 %{
12098 predicate(VM_Version::supports_bmi2());
12099 match(Set dst (RShiftI (LoadI src) shift));
12100 ins_cost(175);
12101 format %{ "sarxl $dst, $src, $shift" %}
12102 ins_encode %{
12103 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12104 %}
12105 ins_pipe(ialu_reg_mem);
12106 %}
12107
12108 // Logical Shift Right by 8-bit immediate
12109 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12110 %{
12111 predicate(!UseAPX);
12112 match(Set dst (URShiftI dst shift));
12113 effect(KILL cr);
12114
12115 format %{ "shrl $dst, $shift" %}
12116 ins_encode %{
12117 __ shrl($dst$$Register, $shift$$constant);
12118 %}
12119 ins_pipe(ialu_reg);
12120 %}
12121
12122 // Logical Shift Right by 8-bit immediate
12123 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12124 %{
12125 predicate(UseAPX);
12126 match(Set dst (URShiftI src shift));
12127 effect(KILL cr);
12128 flag(PD::Flag_ndd_demotable_opr1);
12129
12130 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12131 ins_encode %{
12132 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12133 %}
12134 ins_pipe(ialu_reg);
12135 %}
12136
12137 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12138 %{
12139 predicate(UseAPX);
12140 match(Set dst (URShiftI (LoadI src) shift));
12141 effect(KILL cr);
12142
12143 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12144 ins_encode %{
12145 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12146 %}
12147 ins_pipe(ialu_reg);
12148 %}
12149
12150 // Logical Shift Right by 8-bit immediate
12151 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12152 %{
12153 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12154 effect(KILL cr);
12155
12156 format %{ "shrl $dst, $shift" %}
12157 ins_encode %{
12158 __ shrl($dst$$Address, $shift$$constant);
12159 %}
12160 ins_pipe(ialu_mem_imm);
12161 %}
12162
12163 // Logical Shift Right by variable
12164 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12165 %{
12166 predicate(!VM_Version::supports_bmi2());
12167 match(Set dst (URShiftI dst shift));
12168 effect(KILL cr);
12169
12170 format %{ "shrl $dst, $shift" %}
12171 ins_encode %{
12172 __ shrl($dst$$Register);
12173 %}
12174 ins_pipe(ialu_reg_reg);
12175 %}
12176
12177 // Logical Shift Right by variable
12178 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12179 %{
12180 predicate(!VM_Version::supports_bmi2());
12181 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12182 effect(KILL cr);
12183
12184 format %{ "shrl $dst, $shift" %}
12185 ins_encode %{
12186 __ shrl($dst$$Address);
12187 %}
12188 ins_pipe(ialu_mem_reg);
12189 %}
12190
12191 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12192 %{
12193 predicate(VM_Version::supports_bmi2());
12194 match(Set dst (URShiftI src shift));
12195
12196 format %{ "shrxl $dst, $src, $shift" %}
12197 ins_encode %{
12198 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12199 %}
12200 ins_pipe(ialu_reg_reg);
12201 %}
12202
12203 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12204 %{
12205 predicate(VM_Version::supports_bmi2());
12206 match(Set dst (URShiftI (LoadI src) shift));
12207 ins_cost(175);
12208 format %{ "shrxl $dst, $src, $shift" %}
12209 ins_encode %{
12210 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12211 %}
12212 ins_pipe(ialu_reg_mem);
12213 %}
12214
12215 // Long Shift Instructions
12216 // Shift Left by one, two, three
12217 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12218 %{
12219 predicate(!UseAPX);
12220 match(Set dst (LShiftL dst shift));
12221 effect(KILL cr);
12222
12223 format %{ "salq $dst, $shift" %}
12224 ins_encode %{
12225 __ salq($dst$$Register, $shift$$constant);
12226 %}
12227 ins_pipe(ialu_reg);
12228 %}
12229
12230 // Shift Left by one, two, three
12231 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12232 %{
12233 predicate(UseAPX);
12234 match(Set dst (LShiftL src shift));
12235 effect(KILL cr);
12236 flag(PD::Flag_ndd_demotable_opr1);
12237
12238 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12239 ins_encode %{
12240 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12241 %}
12242 ins_pipe(ialu_reg);
12243 %}
12244
12245 // Shift Left by 8-bit immediate
12246 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12247 %{
12248 predicate(!UseAPX);
12249 match(Set dst (LShiftL dst shift));
12250 effect(KILL cr);
12251
12252 format %{ "salq $dst, $shift" %}
12253 ins_encode %{
12254 __ salq($dst$$Register, $shift$$constant);
12255 %}
12256 ins_pipe(ialu_reg);
12257 %}
12258
12259 // Shift Left by 8-bit immediate
12260 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12261 %{
12262 predicate(UseAPX);
12263 match(Set dst (LShiftL src shift));
12264 effect(KILL cr);
12265 flag(PD::Flag_ndd_demotable_opr1);
12266
12267 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12268 ins_encode %{
12269 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12270 %}
12271 ins_pipe(ialu_reg);
12272 %}
12273
12274 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12275 %{
12276 predicate(UseAPX);
12277 match(Set dst (LShiftL (LoadL src) shift));
12278 effect(KILL cr);
12279
12280 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12281 ins_encode %{
12282 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12283 %}
12284 ins_pipe(ialu_reg);
12285 %}
12286
12287 // Shift Left by 8-bit immediate
12288 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12289 %{
12290 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12291 effect(KILL cr);
12292
12293 format %{ "salq $dst, $shift" %}
12294 ins_encode %{
12295 __ salq($dst$$Address, $shift$$constant);
12296 %}
12297 ins_pipe(ialu_mem_imm);
12298 %}
12299
12300 // Shift Left by variable
12301 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12302 %{
12303 predicate(!VM_Version::supports_bmi2());
12304 match(Set dst (LShiftL dst shift));
12305 effect(KILL cr);
12306
12307 format %{ "salq $dst, $shift" %}
12308 ins_encode %{
12309 __ salq($dst$$Register);
12310 %}
12311 ins_pipe(ialu_reg_reg);
12312 %}
12313
12314 // Shift Left by variable
12315 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12316 %{
12317 predicate(!VM_Version::supports_bmi2());
12318 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12319 effect(KILL cr);
12320
12321 format %{ "salq $dst, $shift" %}
12322 ins_encode %{
12323 __ salq($dst$$Address);
12324 %}
12325 ins_pipe(ialu_mem_reg);
12326 %}
12327
12328 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12329 %{
12330 predicate(VM_Version::supports_bmi2());
12331 match(Set dst (LShiftL src shift));
12332
12333 format %{ "shlxq $dst, $src, $shift" %}
12334 ins_encode %{
12335 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12336 %}
12337 ins_pipe(ialu_reg_reg);
12338 %}
12339
12340 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12341 %{
12342 predicate(VM_Version::supports_bmi2());
12343 match(Set dst (LShiftL (LoadL src) shift));
12344 ins_cost(175);
12345 format %{ "shlxq $dst, $src, $shift" %}
12346 ins_encode %{
12347 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12348 %}
12349 ins_pipe(ialu_reg_mem);
12350 %}
12351
12352 // Arithmetic Shift Right by 8-bit immediate
12353 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12354 %{
12355 predicate(!UseAPX);
12356 match(Set dst (RShiftL dst shift));
12357 effect(KILL cr);
12358
12359 format %{ "sarq $dst, $shift" %}
12360 ins_encode %{
12361 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12362 %}
12363 ins_pipe(ialu_mem_imm);
12364 %}
12365
12366 // Arithmetic Shift Right by 8-bit immediate
12367 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12368 %{
12369 predicate(UseAPX);
12370 match(Set dst (RShiftL src shift));
12371 effect(KILL cr);
12372 flag(PD::Flag_ndd_demotable_opr1);
12373
12374 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12375 ins_encode %{
12376 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12377 %}
12378 ins_pipe(ialu_mem_imm);
12379 %}
12380
12381 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12382 %{
12383 predicate(UseAPX);
12384 match(Set dst (RShiftL (LoadL src) shift));
12385 effect(KILL cr);
12386
12387 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12388 ins_encode %{
12389 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12390 %}
12391 ins_pipe(ialu_mem_imm);
12392 %}
12393
12394 // Arithmetic Shift Right by 8-bit immediate
12395 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12396 %{
12397 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12398 effect(KILL cr);
12399
12400 format %{ "sarq $dst, $shift" %}
12401 ins_encode %{
12402 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12403 %}
12404 ins_pipe(ialu_mem_imm);
12405 %}
12406
12407 // Arithmetic Shift Right by variable
12408 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12409 %{
12410 predicate(!VM_Version::supports_bmi2());
12411 match(Set dst (RShiftL dst shift));
12412 effect(KILL cr);
12413
12414 format %{ "sarq $dst, $shift" %}
12415 ins_encode %{
12416 __ sarq($dst$$Register);
12417 %}
12418 ins_pipe(ialu_reg_reg);
12419 %}
12420
12421 // Arithmetic Shift Right by variable
12422 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12423 %{
12424 predicate(!VM_Version::supports_bmi2());
12425 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12426 effect(KILL cr);
12427
12428 format %{ "sarq $dst, $shift" %}
12429 ins_encode %{
12430 __ sarq($dst$$Address);
12431 %}
12432 ins_pipe(ialu_mem_reg);
12433 %}
12434
12435 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12436 %{
12437 predicate(VM_Version::supports_bmi2());
12438 match(Set dst (RShiftL src shift));
12439
12440 format %{ "sarxq $dst, $src, $shift" %}
12441 ins_encode %{
12442 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12443 %}
12444 ins_pipe(ialu_reg_reg);
12445 %}
12446
12447 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12448 %{
12449 predicate(VM_Version::supports_bmi2());
12450 match(Set dst (RShiftL (LoadL src) shift));
12451 ins_cost(175);
12452 format %{ "sarxq $dst, $src, $shift" %}
12453 ins_encode %{
12454 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12455 %}
12456 ins_pipe(ialu_reg_mem);
12457 %}
12458
12459 // Logical Shift Right by 8-bit immediate
12460 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12461 %{
12462 predicate(!UseAPX);
12463 match(Set dst (URShiftL dst shift));
12464 effect(KILL cr);
12465
12466 format %{ "shrq $dst, $shift" %}
12467 ins_encode %{
12468 __ shrq($dst$$Register, $shift$$constant);
12469 %}
12470 ins_pipe(ialu_reg);
12471 %}
12472
12473 // Logical Shift Right by 8-bit immediate
12474 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12475 %{
12476 predicate(UseAPX);
12477 match(Set dst (URShiftL src shift));
12478 effect(KILL cr);
12479 flag(PD::Flag_ndd_demotable_opr1);
12480
12481 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12482 ins_encode %{
12483 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12484 %}
12485 ins_pipe(ialu_reg);
12486 %}
12487
12488 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12489 %{
12490 predicate(UseAPX);
12491 match(Set dst (URShiftL (LoadL src) shift));
12492 effect(KILL cr);
12493
12494 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12495 ins_encode %{
12496 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12497 %}
12498 ins_pipe(ialu_reg);
12499 %}
12500
12501 // Logical Shift Right by 8-bit immediate
12502 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12503 %{
12504 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12505 effect(KILL cr);
12506
12507 format %{ "shrq $dst, $shift" %}
12508 ins_encode %{
12509 __ shrq($dst$$Address, $shift$$constant);
12510 %}
12511 ins_pipe(ialu_mem_imm);
12512 %}
12513
12514 // Logical Shift Right by variable
12515 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12516 %{
12517 predicate(!VM_Version::supports_bmi2());
12518 match(Set dst (URShiftL dst shift));
12519 effect(KILL cr);
12520
12521 format %{ "shrq $dst, $shift" %}
12522 ins_encode %{
12523 __ shrq($dst$$Register);
12524 %}
12525 ins_pipe(ialu_reg_reg);
12526 %}
12527
12528 // Logical Shift Right by variable
12529 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12530 %{
12531 predicate(!VM_Version::supports_bmi2());
12532 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12533 effect(KILL cr);
12534
12535 format %{ "shrq $dst, $shift" %}
12536 ins_encode %{
12537 __ shrq($dst$$Address);
12538 %}
12539 ins_pipe(ialu_mem_reg);
12540 %}
12541
12542 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12543 %{
12544 predicate(VM_Version::supports_bmi2());
12545 match(Set dst (URShiftL src shift));
12546
12547 format %{ "shrxq $dst, $src, $shift" %}
12548 ins_encode %{
12549 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12550 %}
12551 ins_pipe(ialu_reg_reg);
12552 %}
12553
12554 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12555 %{
12556 predicate(VM_Version::supports_bmi2());
12557 match(Set dst (URShiftL (LoadL src) shift));
12558 ins_cost(175);
12559 format %{ "shrxq $dst, $src, $shift" %}
12560 ins_encode %{
12561 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12562 %}
12563 ins_pipe(ialu_reg_mem);
12564 %}
12565
12566 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12567 // This idiom is used by the compiler for the i2b bytecode.
12568 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12569 %{
12570 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12571
12572 format %{ "movsbl $dst, $src\t# i2b" %}
12573 ins_encode %{
12574 __ movsbl($dst$$Register, $src$$Register);
12575 %}
12576 ins_pipe(ialu_reg_reg);
12577 %}
12578
12579 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12580 // This idiom is used by the compiler the i2s bytecode.
12581 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12582 %{
12583 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12584
12585 format %{ "movswl $dst, $src\t# i2s" %}
12586 ins_encode %{
12587 __ movswl($dst$$Register, $src$$Register);
12588 %}
12589 ins_pipe(ialu_reg_reg);
12590 %}
12591
12592 // ROL/ROR instructions
12593
12594 // Rotate left by constant.
12595 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12596 %{
12597 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12598 match(Set dst (RotateLeft dst shift));
12599 effect(KILL cr);
12600 format %{ "roll $dst, $shift" %}
12601 ins_encode %{
12602 __ roll($dst$$Register, $shift$$constant);
12603 %}
12604 ins_pipe(ialu_reg);
12605 %}
12606
12607 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12608 %{
12609 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12610 match(Set dst (RotateLeft src shift));
12611 format %{ "rolxl $dst, $src, $shift" %}
12612 ins_encode %{
12613 int shift = 32 - ($shift$$constant & 31);
12614 __ rorxl($dst$$Register, $src$$Register, shift);
12615 %}
12616 ins_pipe(ialu_reg_reg);
12617 %}
12618
12619 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12620 %{
12621 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12622 match(Set dst (RotateLeft (LoadI src) shift));
12623 ins_cost(175);
12624 format %{ "rolxl $dst, $src, $shift" %}
12625 ins_encode %{
12626 int shift = 32 - ($shift$$constant & 31);
12627 __ rorxl($dst$$Register, $src$$Address, shift);
12628 %}
12629 ins_pipe(ialu_reg_mem);
12630 %}
12631
12632 // Rotate Left by variable
12633 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12634 %{
12635 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12636 match(Set dst (RotateLeft dst shift));
12637 effect(KILL cr);
12638 format %{ "roll $dst, $shift" %}
12639 ins_encode %{
12640 __ roll($dst$$Register);
12641 %}
12642 ins_pipe(ialu_reg_reg);
12643 %}
12644
12645 // Rotate Left by variable
12646 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12647 %{
12648 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12649 match(Set dst (RotateLeft src shift));
12650 effect(KILL cr);
12651 flag(PD::Flag_ndd_demotable_opr1);
12652
12653 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12654 ins_encode %{
12655 __ eroll($dst$$Register, $src$$Register, false);
12656 %}
12657 ins_pipe(ialu_reg_reg);
12658 %}
12659
12660 // Rotate Right by constant.
12661 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12662 %{
12663 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12664 match(Set dst (RotateRight dst shift));
12665 effect(KILL cr);
12666 format %{ "rorl $dst, $shift" %}
12667 ins_encode %{
12668 __ rorl($dst$$Register, $shift$$constant);
12669 %}
12670 ins_pipe(ialu_reg);
12671 %}
12672
12673 // Rotate Right by constant.
12674 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12675 %{
12676 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12677 match(Set dst (RotateRight src shift));
12678 format %{ "rorxl $dst, $src, $shift" %}
12679 ins_encode %{
12680 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12681 %}
12682 ins_pipe(ialu_reg_reg);
12683 %}
12684
12685 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12686 %{
12687 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12688 match(Set dst (RotateRight (LoadI src) shift));
12689 ins_cost(175);
12690 format %{ "rorxl $dst, $src, $shift" %}
12691 ins_encode %{
12692 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12693 %}
12694 ins_pipe(ialu_reg_mem);
12695 %}
12696
12697 // Rotate Right by variable
12698 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12699 %{
12700 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12701 match(Set dst (RotateRight dst shift));
12702 effect(KILL cr);
12703 format %{ "rorl $dst, $shift" %}
12704 ins_encode %{
12705 __ rorl($dst$$Register);
12706 %}
12707 ins_pipe(ialu_reg_reg);
12708 %}
12709
12710 // Rotate Right by variable
12711 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12712 %{
12713 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12714 match(Set dst (RotateRight src shift));
12715 effect(KILL cr);
12716 flag(PD::Flag_ndd_demotable_opr1);
12717
12718 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12719 ins_encode %{
12720 __ erorl($dst$$Register, $src$$Register, false);
12721 %}
12722 ins_pipe(ialu_reg_reg);
12723 %}
12724
12725 // Rotate Left by constant.
12726 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12727 %{
12728 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12729 match(Set dst (RotateLeft dst shift));
12730 effect(KILL cr);
12731 format %{ "rolq $dst, $shift" %}
12732 ins_encode %{
12733 __ rolq($dst$$Register, $shift$$constant);
12734 %}
12735 ins_pipe(ialu_reg);
12736 %}
12737
12738 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12739 %{
12740 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12741 match(Set dst (RotateLeft src shift));
12742 format %{ "rolxq $dst, $src, $shift" %}
12743 ins_encode %{
12744 int shift = 64 - ($shift$$constant & 63);
12745 __ rorxq($dst$$Register, $src$$Register, shift);
12746 %}
12747 ins_pipe(ialu_reg_reg);
12748 %}
12749
12750 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12751 %{
12752 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12753 match(Set dst (RotateLeft (LoadL src) shift));
12754 ins_cost(175);
12755 format %{ "rolxq $dst, $src, $shift" %}
12756 ins_encode %{
12757 int shift = 64 - ($shift$$constant & 63);
12758 __ rorxq($dst$$Register, $src$$Address, shift);
12759 %}
12760 ins_pipe(ialu_reg_mem);
12761 %}
12762
12763 // Rotate Left by variable
12764 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12765 %{
12766 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12767 match(Set dst (RotateLeft dst shift));
12768 effect(KILL cr);
12769
12770 format %{ "rolq $dst, $shift" %}
12771 ins_encode %{
12772 __ rolq($dst$$Register);
12773 %}
12774 ins_pipe(ialu_reg_reg);
12775 %}
12776
12777 // Rotate Left by variable
12778 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12779 %{
12780 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12781 match(Set dst (RotateLeft src shift));
12782 effect(KILL cr);
12783 flag(PD::Flag_ndd_demotable_opr1);
12784
12785 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12786 ins_encode %{
12787 __ erolq($dst$$Register, $src$$Register, false);
12788 %}
12789 ins_pipe(ialu_reg_reg);
12790 %}
12791
12792 // Rotate Right by constant.
12793 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12794 %{
12795 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12796 match(Set dst (RotateRight dst shift));
12797 effect(KILL cr);
12798 format %{ "rorq $dst, $shift" %}
12799 ins_encode %{
12800 __ rorq($dst$$Register, $shift$$constant);
12801 %}
12802 ins_pipe(ialu_reg);
12803 %}
12804
12805 // Rotate Right by constant
12806 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12807 %{
12808 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12809 match(Set dst (RotateRight src shift));
12810 format %{ "rorxq $dst, $src, $shift" %}
12811 ins_encode %{
12812 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12813 %}
12814 ins_pipe(ialu_reg_reg);
12815 %}
12816
12817 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12818 %{
12819 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12820 match(Set dst (RotateRight (LoadL src) shift));
12821 ins_cost(175);
12822 format %{ "rorxq $dst, $src, $shift" %}
12823 ins_encode %{
12824 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12825 %}
12826 ins_pipe(ialu_reg_mem);
12827 %}
12828
12829 // Rotate Right by variable
12830 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12831 %{
12832 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12833 match(Set dst (RotateRight dst shift));
12834 effect(KILL cr);
12835 format %{ "rorq $dst, $shift" %}
12836 ins_encode %{
12837 __ rorq($dst$$Register);
12838 %}
12839 ins_pipe(ialu_reg_reg);
12840 %}
12841
12842 // Rotate Right by variable
12843 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12844 %{
12845 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12846 match(Set dst (RotateRight src shift));
12847 effect(KILL cr);
12848 flag(PD::Flag_ndd_demotable_opr1);
12849
12850 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12851 ins_encode %{
12852 __ erorq($dst$$Register, $src$$Register, false);
12853 %}
12854 ins_pipe(ialu_reg_reg);
12855 %}
12856
12857 //----------------------------- CompressBits/ExpandBits ------------------------
12858
12859 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12860 predicate(n->bottom_type()->isa_long());
12861 match(Set dst (CompressBits src mask));
12862 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12863 ins_encode %{
12864 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12865 %}
12866 ins_pipe( pipe_slow );
12867 %}
12868
12869 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12870 predicate(n->bottom_type()->isa_long());
12871 match(Set dst (ExpandBits src mask));
12872 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12873 ins_encode %{
12874 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12875 %}
12876 ins_pipe( pipe_slow );
12877 %}
12878
12879 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12880 predicate(n->bottom_type()->isa_long());
12881 match(Set dst (CompressBits src (LoadL mask)));
12882 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12883 ins_encode %{
12884 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12885 %}
12886 ins_pipe( pipe_slow );
12887 %}
12888
12889 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12890 predicate(n->bottom_type()->isa_long());
12891 match(Set dst (ExpandBits src (LoadL mask)));
12892 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12893 ins_encode %{
12894 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12895 %}
12896 ins_pipe( pipe_slow );
12897 %}
12898
12899
12900 // Logical Instructions
12901
12902 // Integer Logical Instructions
12903
12904 // And Instructions
12905 // And Register with Register
12906 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12907 %{
12908 predicate(!UseAPX);
12909 match(Set dst (AndI dst src));
12910 effect(KILL cr);
12911 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12912
12913 format %{ "andl $dst, $src\t# int" %}
12914 ins_encode %{
12915 __ andl($dst$$Register, $src$$Register);
12916 %}
12917 ins_pipe(ialu_reg_reg);
12918 %}
12919
12920 // And Register with Register using New Data Destination (NDD)
12921 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12922 %{
12923 predicate(UseAPX);
12924 match(Set dst (AndI src1 src2));
12925 effect(KILL cr);
12926 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12927
12928 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12929 ins_encode %{
12930 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12931
12932 %}
12933 ins_pipe(ialu_reg_reg);
12934 %}
12935
12936 // And Register with Immediate 255
12937 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12938 %{
12939 match(Set dst (AndI src mask));
12940
12941 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12942 ins_encode %{
12943 __ movzbl($dst$$Register, $src$$Register);
12944 %}
12945 ins_pipe(ialu_reg);
12946 %}
12947
12948 // And Register with Immediate 255 and promote to long
12949 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12950 %{
12951 match(Set dst (ConvI2L (AndI src mask)));
12952
12953 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12954 ins_encode %{
12955 __ movzbl($dst$$Register, $src$$Register);
12956 %}
12957 ins_pipe(ialu_reg);
12958 %}
12959
12960 // And Register with Immediate 65535
12961 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12962 %{
12963 match(Set dst (AndI src mask));
12964
12965 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12966 ins_encode %{
12967 __ movzwl($dst$$Register, $src$$Register);
12968 %}
12969 ins_pipe(ialu_reg);
12970 %}
12971
12972 // And Register with Immediate 65535 and promote to long
12973 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12974 %{
12975 match(Set dst (ConvI2L (AndI src mask)));
12976
12977 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12978 ins_encode %{
12979 __ movzwl($dst$$Register, $src$$Register);
12980 %}
12981 ins_pipe(ialu_reg);
12982 %}
12983
12984 // Can skip int2long conversions after AND with small bitmask
12985 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12986 %{
12987 predicate(VM_Version::supports_bmi2());
12988 ins_cost(125);
12989 effect(TEMP tmp, KILL cr);
12990 match(Set dst (ConvI2L (AndI src mask)));
12991 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12992 ins_encode %{
12993 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12994 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12995 %}
12996 ins_pipe(ialu_reg_reg);
12997 %}
12998
12999 // And Register with Immediate
13000 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13001 %{
13002 predicate(!UseAPX);
13003 match(Set dst (AndI dst src));
13004 effect(KILL cr);
13005 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13006
13007 format %{ "andl $dst, $src\t# int" %}
13008 ins_encode %{
13009 __ andl($dst$$Register, $src$$constant);
13010 %}
13011 ins_pipe(ialu_reg);
13012 %}
13013
13014 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13015 %{
13016 predicate(UseAPX);
13017 match(Set dst (AndI src1 src2));
13018 effect(KILL cr);
13019 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13020
13021 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13022 ins_encode %{
13023 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13024 %}
13025 ins_pipe(ialu_reg);
13026 %}
13027
13028 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13029 %{
13030 predicate(UseAPX);
13031 match(Set dst (AndI (LoadI src1) src2));
13032 effect(KILL cr);
13033 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13034
13035 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13036 ins_encode %{
13037 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13038 %}
13039 ins_pipe(ialu_reg);
13040 %}
13041
13042 // And Register with Memory
13043 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13044 %{
13045 predicate(!UseAPX);
13046 match(Set dst (AndI dst (LoadI src)));
13047 effect(KILL cr);
13048 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13049
13050 ins_cost(150);
13051 format %{ "andl $dst, $src\t# int" %}
13052 ins_encode %{
13053 __ andl($dst$$Register, $src$$Address);
13054 %}
13055 ins_pipe(ialu_reg_mem);
13056 %}
13057
13058 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13059 %{
13060 predicate(UseAPX);
13061 match(Set dst (AndI src1 (LoadI src2)));
13062 effect(KILL cr);
13063 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13064
13065 ins_cost(150);
13066 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13067 ins_encode %{
13068 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13069 %}
13070 ins_pipe(ialu_reg_mem);
13071 %}
13072
13073 // And Memory with Register
13074 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13075 %{
13076 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13077 effect(KILL cr);
13078 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13079
13080 ins_cost(150);
13081 format %{ "andb $dst, $src\t# byte" %}
13082 ins_encode %{
13083 __ andb($dst$$Address, $src$$Register);
13084 %}
13085 ins_pipe(ialu_mem_reg);
13086 %}
13087
13088 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13089 %{
13090 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13091 effect(KILL cr);
13092 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13093
13094 ins_cost(150);
13095 format %{ "andl $dst, $src\t# int" %}
13096 ins_encode %{
13097 __ andl($dst$$Address, $src$$Register);
13098 %}
13099 ins_pipe(ialu_mem_reg);
13100 %}
13101
13102 // And Memory with Immediate
13103 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13104 %{
13105 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13106 effect(KILL cr);
13107 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13108
13109 ins_cost(125);
13110 format %{ "andl $dst, $src\t# int" %}
13111 ins_encode %{
13112 __ andl($dst$$Address, $src$$constant);
13113 %}
13114 ins_pipe(ialu_mem_imm);
13115 %}
13116
13117 // BMI1 instructions
13118 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13119 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13120 predicate(UseBMI1Instructions);
13121 effect(KILL cr);
13122 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13123
13124 ins_cost(125);
13125 format %{ "andnl $dst, $src1, $src2" %}
13126
13127 ins_encode %{
13128 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13129 %}
13130 ins_pipe(ialu_reg_mem);
13131 %}
13132
13133 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13134 match(Set dst (AndI (XorI src1 minus_1) src2));
13135 predicate(UseBMI1Instructions);
13136 effect(KILL cr);
13137 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13138
13139 format %{ "andnl $dst, $src1, $src2" %}
13140
13141 ins_encode %{
13142 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13143 %}
13144 ins_pipe(ialu_reg);
13145 %}
13146
13147 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13148 match(Set dst (AndI (SubI imm_zero src) src));
13149 predicate(UseBMI1Instructions);
13150 effect(KILL cr);
13151 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13152
13153 format %{ "blsil $dst, $src" %}
13154
13155 ins_encode %{
13156 __ blsil($dst$$Register, $src$$Register);
13157 %}
13158 ins_pipe(ialu_reg);
13159 %}
13160
13161 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13162 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13163 predicate(UseBMI1Instructions);
13164 effect(KILL cr);
13165 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13166
13167 ins_cost(125);
13168 format %{ "blsil $dst, $src" %}
13169
13170 ins_encode %{
13171 __ blsil($dst$$Register, $src$$Address);
13172 %}
13173 ins_pipe(ialu_reg_mem);
13174 %}
13175
13176 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13177 %{
13178 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13179 predicate(UseBMI1Instructions);
13180 effect(KILL cr);
13181 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13182
13183 ins_cost(125);
13184 format %{ "blsmskl $dst, $src" %}
13185
13186 ins_encode %{
13187 __ blsmskl($dst$$Register, $src$$Address);
13188 %}
13189 ins_pipe(ialu_reg_mem);
13190 %}
13191
13192 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13193 %{
13194 match(Set dst (XorI (AddI src minus_1) src));
13195 predicate(UseBMI1Instructions);
13196 effect(KILL cr);
13197 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13198
13199 format %{ "blsmskl $dst, $src" %}
13200
13201 ins_encode %{
13202 __ blsmskl($dst$$Register, $src$$Register);
13203 %}
13204
13205 ins_pipe(ialu_reg);
13206 %}
13207
13208 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13209 %{
13210 match(Set dst (AndI (AddI src minus_1) src) );
13211 predicate(UseBMI1Instructions);
13212 effect(KILL cr);
13213 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13214
13215 format %{ "blsrl $dst, $src" %}
13216
13217 ins_encode %{
13218 __ blsrl($dst$$Register, $src$$Register);
13219 %}
13220
13221 ins_pipe(ialu_reg_mem);
13222 %}
13223
13224 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13225 %{
13226 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13227 predicate(UseBMI1Instructions);
13228 effect(KILL cr);
13229 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13230
13231 ins_cost(125);
13232 format %{ "blsrl $dst, $src" %}
13233
13234 ins_encode %{
13235 __ blsrl($dst$$Register, $src$$Address);
13236 %}
13237
13238 ins_pipe(ialu_reg);
13239 %}
13240
13241 // Or Instructions
13242 // Or Register with Register
13243 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13244 %{
13245 predicate(!UseAPX);
13246 match(Set dst (OrI dst src));
13247 effect(KILL cr);
13248 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13249
13250 format %{ "orl $dst, $src\t# int" %}
13251 ins_encode %{
13252 __ orl($dst$$Register, $src$$Register);
13253 %}
13254 ins_pipe(ialu_reg_reg);
13255 %}
13256
13257 // Or Register with Register using New Data Destination (NDD)
13258 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13259 %{
13260 predicate(UseAPX);
13261 match(Set dst (OrI src1 src2));
13262 effect(KILL cr);
13263 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13264
13265 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13266 ins_encode %{
13267 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13268 %}
13269 ins_pipe(ialu_reg_reg);
13270 %}
13271
13272 // Or Register with Immediate
13273 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13274 %{
13275 predicate(!UseAPX);
13276 match(Set dst (OrI dst src));
13277 effect(KILL cr);
13278 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13279
13280 format %{ "orl $dst, $src\t# int" %}
13281 ins_encode %{
13282 __ orl($dst$$Register, $src$$constant);
13283 %}
13284 ins_pipe(ialu_reg);
13285 %}
13286
13287 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13288 %{
13289 predicate(UseAPX);
13290 match(Set dst (OrI src1 src2));
13291 effect(KILL cr);
13292 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13293
13294 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13295 ins_encode %{
13296 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13297 %}
13298 ins_pipe(ialu_reg);
13299 %}
13300
13301 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13302 %{
13303 predicate(UseAPX);
13304 match(Set dst (OrI src1 src2));
13305 effect(KILL cr);
13306 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13307
13308 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13309 ins_encode %{
13310 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13311 %}
13312 ins_pipe(ialu_reg);
13313 %}
13314
13315 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13316 %{
13317 predicate(UseAPX);
13318 match(Set dst (OrI (LoadI src1) src2));
13319 effect(KILL cr);
13320 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13321
13322 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13323 ins_encode %{
13324 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13325 %}
13326 ins_pipe(ialu_reg);
13327 %}
13328
13329 // Or Register with Memory
13330 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13331 %{
13332 predicate(!UseAPX);
13333 match(Set dst (OrI dst (LoadI src)));
13334 effect(KILL cr);
13335 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13336
13337 ins_cost(150);
13338 format %{ "orl $dst, $src\t# int" %}
13339 ins_encode %{
13340 __ orl($dst$$Register, $src$$Address);
13341 %}
13342 ins_pipe(ialu_reg_mem);
13343 %}
13344
13345 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13346 %{
13347 predicate(UseAPX);
13348 match(Set dst (OrI src1 (LoadI src2)));
13349 effect(KILL cr);
13350 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13351
13352 ins_cost(150);
13353 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13354 ins_encode %{
13355 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13356 %}
13357 ins_pipe(ialu_reg_mem);
13358 %}
13359
13360 // Or Memory with Register
13361 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13362 %{
13363 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13364 effect(KILL cr);
13365 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13366
13367 ins_cost(150);
13368 format %{ "orb $dst, $src\t# byte" %}
13369 ins_encode %{
13370 __ orb($dst$$Address, $src$$Register);
13371 %}
13372 ins_pipe(ialu_mem_reg);
13373 %}
13374
13375 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13376 %{
13377 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13378 effect(KILL cr);
13379 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13380
13381 ins_cost(150);
13382 format %{ "orl $dst, $src\t# int" %}
13383 ins_encode %{
13384 __ orl($dst$$Address, $src$$Register);
13385 %}
13386 ins_pipe(ialu_mem_reg);
13387 %}
13388
13389 // Or Memory with Immediate
13390 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13391 %{
13392 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13393 effect(KILL cr);
13394 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13395
13396 ins_cost(125);
13397 format %{ "orl $dst, $src\t# int" %}
13398 ins_encode %{
13399 __ orl($dst$$Address, $src$$constant);
13400 %}
13401 ins_pipe(ialu_mem_imm);
13402 %}
13403
13404 // Xor Instructions
13405 // Xor Register with Register
13406 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13407 %{
13408 predicate(!UseAPX);
13409 match(Set dst (XorI dst src));
13410 effect(KILL cr);
13411 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13412
13413 format %{ "xorl $dst, $src\t# int" %}
13414 ins_encode %{
13415 __ xorl($dst$$Register, $src$$Register);
13416 %}
13417 ins_pipe(ialu_reg_reg);
13418 %}
13419
13420 // Xor Register with Register using New Data Destination (NDD)
13421 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13422 %{
13423 predicate(UseAPX);
13424 match(Set dst (XorI src1 src2));
13425 effect(KILL cr);
13426 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13427
13428 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13429 ins_encode %{
13430 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13431 %}
13432 ins_pipe(ialu_reg_reg);
13433 %}
13434
13435 // Xor Register with Immediate -1
13436 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13437 %{
13438 predicate(!UseAPX);
13439 match(Set dst (XorI dst imm));
13440
13441 format %{ "notl $dst" %}
13442 ins_encode %{
13443 __ notl($dst$$Register);
13444 %}
13445 ins_pipe(ialu_reg);
13446 %}
13447
13448 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13449 %{
13450 match(Set dst (XorI src imm));
13451 predicate(UseAPX);
13452 flag(PD::Flag_ndd_demotable_opr1);
13453
13454 format %{ "enotl $dst, $src" %}
13455 ins_encode %{
13456 __ enotl($dst$$Register, $src$$Register);
13457 %}
13458 ins_pipe(ialu_reg);
13459 %}
13460
13461 // Xor Register with Immediate
13462 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13463 %{
13464 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13465 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13466 match(Set dst (XorI dst src));
13467 effect(KILL cr);
13468 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13469
13470 format %{ "xorl $dst, $src\t# int" %}
13471 ins_encode %{
13472 __ xorl($dst$$Register, $src$$constant);
13473 %}
13474 ins_pipe(ialu_reg);
13475 %}
13476
13477 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13478 %{
13479 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13480 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13481 match(Set dst (XorI src1 src2));
13482 effect(KILL cr);
13483 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13484
13485 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13486 ins_encode %{
13487 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13488 %}
13489 ins_pipe(ialu_reg);
13490 %}
13491
13492 // Xor Memory with Immediate
13493 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13494 %{
13495 predicate(UseAPX);
13496 match(Set dst (XorI (LoadI src1) src2));
13497 effect(KILL cr);
13498 ins_cost(150);
13499 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13500
13501 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13502 ins_encode %{
13503 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13504 %}
13505 ins_pipe(ialu_reg);
13506 %}
13507
13508 // Xor Register with Memory
13509 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13510 %{
13511 predicate(!UseAPX);
13512 match(Set dst (XorI dst (LoadI src)));
13513 effect(KILL cr);
13514 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13515
13516 ins_cost(150);
13517 format %{ "xorl $dst, $src\t# int" %}
13518 ins_encode %{
13519 __ xorl($dst$$Register, $src$$Address);
13520 %}
13521 ins_pipe(ialu_reg_mem);
13522 %}
13523
13524 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13525 %{
13526 predicate(UseAPX);
13527 match(Set dst (XorI src1 (LoadI src2)));
13528 effect(KILL cr);
13529 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13530
13531 ins_cost(150);
13532 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13533 ins_encode %{
13534 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13535 %}
13536 ins_pipe(ialu_reg_mem);
13537 %}
13538
13539 // Xor Memory with Register
13540 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13541 %{
13542 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13543 effect(KILL cr);
13544 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13545
13546 ins_cost(150);
13547 format %{ "xorb $dst, $src\t# byte" %}
13548 ins_encode %{
13549 __ xorb($dst$$Address, $src$$Register);
13550 %}
13551 ins_pipe(ialu_mem_reg);
13552 %}
13553
13554 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13555 %{
13556 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13557 effect(KILL cr);
13558 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13559
13560 ins_cost(150);
13561 format %{ "xorl $dst, $src\t# int" %}
13562 ins_encode %{
13563 __ xorl($dst$$Address, $src$$Register);
13564 %}
13565 ins_pipe(ialu_mem_reg);
13566 %}
13567
13568 // Xor Memory with Immediate
13569 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13570 %{
13571 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13572 effect(KILL cr);
13573 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13574
13575 ins_cost(125);
13576 format %{ "xorl $dst, $src\t# int" %}
13577 ins_encode %{
13578 __ xorl($dst$$Address, $src$$constant);
13579 %}
13580 ins_pipe(ialu_mem_imm);
13581 %}
13582
13583
13584 // Long Logical Instructions
13585
13586 // And Instructions
13587 // And Register with Register
13588 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13589 %{
13590 predicate(!UseAPX);
13591 match(Set dst (AndL dst src));
13592 effect(KILL cr);
13593 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13594
13595 format %{ "andq $dst, $src\t# long" %}
13596 ins_encode %{
13597 __ andq($dst$$Register, $src$$Register);
13598 %}
13599 ins_pipe(ialu_reg_reg);
13600 %}
13601
13602 // And Register with Register using New Data Destination (NDD)
13603 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13604 %{
13605 predicate(UseAPX);
13606 match(Set dst (AndL src1 src2));
13607 effect(KILL cr);
13608 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13609
13610 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13611 ins_encode %{
13612 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13613
13614 %}
13615 ins_pipe(ialu_reg_reg);
13616 %}
13617
13618 // And Register with Immediate 255
13619 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13620 %{
13621 match(Set dst (AndL src mask));
13622
13623 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13624 ins_encode %{
13625 // movzbl zeroes out the upper 32-bit and does not need REX.W
13626 __ movzbl($dst$$Register, $src$$Register);
13627 %}
13628 ins_pipe(ialu_reg);
13629 %}
13630
13631 // And Register with Immediate 65535
13632 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13633 %{
13634 match(Set dst (AndL src mask));
13635
13636 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13637 ins_encode %{
13638 // movzwl zeroes out the upper 32-bit and does not need REX.W
13639 __ movzwl($dst$$Register, $src$$Register);
13640 %}
13641 ins_pipe(ialu_reg);
13642 %}
13643
13644 // And Register with Immediate
13645 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13646 %{
13647 predicate(!UseAPX);
13648 match(Set dst (AndL dst src));
13649 effect(KILL cr);
13650 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13651
13652 format %{ "andq $dst, $src\t# long" %}
13653 ins_encode %{
13654 __ andq($dst$$Register, $src$$constant);
13655 %}
13656 ins_pipe(ialu_reg);
13657 %}
13658
13659 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13660 %{
13661 predicate(UseAPX);
13662 match(Set dst (AndL src1 src2));
13663 effect(KILL cr);
13664 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13665
13666 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13667 ins_encode %{
13668 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13669 %}
13670 ins_pipe(ialu_reg);
13671 %}
13672
13673 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13674 %{
13675 predicate(UseAPX);
13676 match(Set dst (AndL (LoadL src1) src2));
13677 effect(KILL cr);
13678 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13679
13680 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13681 ins_encode %{
13682 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13683 %}
13684 ins_pipe(ialu_reg);
13685 %}
13686
13687 // And Register with Memory
13688 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13689 %{
13690 predicate(!UseAPX);
13691 match(Set dst (AndL dst (LoadL src)));
13692 effect(KILL cr);
13693 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13694
13695 ins_cost(150);
13696 format %{ "andq $dst, $src\t# long" %}
13697 ins_encode %{
13698 __ andq($dst$$Register, $src$$Address);
13699 %}
13700 ins_pipe(ialu_reg_mem);
13701 %}
13702
13703 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13704 %{
13705 predicate(UseAPX);
13706 match(Set dst (AndL src1 (LoadL src2)));
13707 effect(KILL cr);
13708 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13709
13710 ins_cost(150);
13711 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13712 ins_encode %{
13713 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13714 %}
13715 ins_pipe(ialu_reg_mem);
13716 %}
13717
13718 // And Memory with Register
13719 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13720 %{
13721 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13722 effect(KILL cr);
13723 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13724
13725 ins_cost(150);
13726 format %{ "andq $dst, $src\t# long" %}
13727 ins_encode %{
13728 __ andq($dst$$Address, $src$$Register);
13729 %}
13730 ins_pipe(ialu_mem_reg);
13731 %}
13732
13733 // And Memory with Immediate
13734 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13735 %{
13736 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13737 effect(KILL cr);
13738 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13739
13740 ins_cost(125);
13741 format %{ "andq $dst, $src\t# long" %}
13742 ins_encode %{
13743 __ andq($dst$$Address, $src$$constant);
13744 %}
13745 ins_pipe(ialu_mem_imm);
13746 %}
13747
13748 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13749 %{
13750 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13751 // because AND/OR works well enough for 8/32-bit values.
13752 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13753
13754 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13755 effect(KILL cr);
13756
13757 ins_cost(125);
13758 format %{ "btrq $dst, log2(not($con))\t# long" %}
13759 ins_encode %{
13760 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13761 %}
13762 ins_pipe(ialu_mem_imm);
13763 %}
13764
13765 // BMI1 instructions
13766 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13767 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13768 predicate(UseBMI1Instructions);
13769 effect(KILL cr);
13770 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13771
13772 ins_cost(125);
13773 format %{ "andnq $dst, $src1, $src2" %}
13774
13775 ins_encode %{
13776 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13777 %}
13778 ins_pipe(ialu_reg_mem);
13779 %}
13780
13781 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13782 match(Set dst (AndL (XorL src1 minus_1) src2));
13783 predicate(UseBMI1Instructions);
13784 effect(KILL cr);
13785 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786
13787 format %{ "andnq $dst, $src1, $src2" %}
13788
13789 ins_encode %{
13790 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13791 %}
13792 ins_pipe(ialu_reg_mem);
13793 %}
13794
13795 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13796 match(Set dst (AndL (SubL imm_zero src) src));
13797 predicate(UseBMI1Instructions);
13798 effect(KILL cr);
13799 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13800
13801 format %{ "blsiq $dst, $src" %}
13802
13803 ins_encode %{
13804 __ blsiq($dst$$Register, $src$$Register);
13805 %}
13806 ins_pipe(ialu_reg);
13807 %}
13808
13809 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13810 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13811 predicate(UseBMI1Instructions);
13812 effect(KILL cr);
13813 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13814
13815 ins_cost(125);
13816 format %{ "blsiq $dst, $src" %}
13817
13818 ins_encode %{
13819 __ blsiq($dst$$Register, $src$$Address);
13820 %}
13821 ins_pipe(ialu_reg_mem);
13822 %}
13823
13824 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13825 %{
13826 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13827 predicate(UseBMI1Instructions);
13828 effect(KILL cr);
13829 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13830
13831 ins_cost(125);
13832 format %{ "blsmskq $dst, $src" %}
13833
13834 ins_encode %{
13835 __ blsmskq($dst$$Register, $src$$Address);
13836 %}
13837 ins_pipe(ialu_reg_mem);
13838 %}
13839
13840 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13841 %{
13842 match(Set dst (XorL (AddL src minus_1) src));
13843 predicate(UseBMI1Instructions);
13844 effect(KILL cr);
13845 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13846
13847 format %{ "blsmskq $dst, $src" %}
13848
13849 ins_encode %{
13850 __ blsmskq($dst$$Register, $src$$Register);
13851 %}
13852
13853 ins_pipe(ialu_reg);
13854 %}
13855
13856 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13857 %{
13858 match(Set dst (AndL (AddL src minus_1) src) );
13859 predicate(UseBMI1Instructions);
13860 effect(KILL cr);
13861 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13862
13863 format %{ "blsrq $dst, $src" %}
13864
13865 ins_encode %{
13866 __ blsrq($dst$$Register, $src$$Register);
13867 %}
13868
13869 ins_pipe(ialu_reg);
13870 %}
13871
13872 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13873 %{
13874 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13875 predicate(UseBMI1Instructions);
13876 effect(KILL cr);
13877 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13878
13879 ins_cost(125);
13880 format %{ "blsrq $dst, $src" %}
13881
13882 ins_encode %{
13883 __ blsrq($dst$$Register, $src$$Address);
13884 %}
13885
13886 ins_pipe(ialu_reg);
13887 %}
13888
13889 // Or Instructions
13890 // Or Register with Register
13891 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13892 %{
13893 predicate(!UseAPX);
13894 match(Set dst (OrL dst src));
13895 effect(KILL cr);
13896 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13897
13898 format %{ "orq $dst, $src\t# long" %}
13899 ins_encode %{
13900 __ orq($dst$$Register, $src$$Register);
13901 %}
13902 ins_pipe(ialu_reg_reg);
13903 %}
13904
13905 // Or Register with Register using New Data Destination (NDD)
13906 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13907 %{
13908 predicate(UseAPX);
13909 match(Set dst (OrL src1 src2));
13910 effect(KILL cr);
13911 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13912
13913 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13914 ins_encode %{
13915 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13916
13917 %}
13918 ins_pipe(ialu_reg_reg);
13919 %}
13920
13921 // Use any_RegP to match R15 (TLS register) without spilling.
13922 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13923 match(Set dst (OrL dst (CastP2X src)));
13924 effect(KILL cr);
13925 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13926
13927 format %{ "orq $dst, $src\t# long" %}
13928 ins_encode %{
13929 __ orq($dst$$Register, $src$$Register);
13930 %}
13931 ins_pipe(ialu_reg_reg);
13932 %}
13933
13934 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13935 match(Set dst (OrL src1 (CastP2X src2)));
13936 effect(KILL cr);
13937 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13938
13939 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13940 ins_encode %{
13941 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13942 %}
13943 ins_pipe(ialu_reg_reg);
13944 %}
13945
13946 // Or Register with Immediate
13947 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13948 %{
13949 predicate(!UseAPX);
13950 match(Set dst (OrL dst src));
13951 effect(KILL cr);
13952 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13953
13954 format %{ "orq $dst, $src\t# long" %}
13955 ins_encode %{
13956 __ orq($dst$$Register, $src$$constant);
13957 %}
13958 ins_pipe(ialu_reg);
13959 %}
13960
13961 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13962 %{
13963 predicate(UseAPX);
13964 match(Set dst (OrL src1 src2));
13965 effect(KILL cr);
13966 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13967
13968 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13969 ins_encode %{
13970 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13971 %}
13972 ins_pipe(ialu_reg);
13973 %}
13974
13975 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13976 %{
13977 predicate(UseAPX);
13978 match(Set dst (OrL src1 src2));
13979 effect(KILL cr);
13980 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13981
13982 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13983 ins_encode %{
13984 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13985 %}
13986 ins_pipe(ialu_reg);
13987 %}
13988
13989 // Or Memory with Immediate
13990 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13991 %{
13992 predicate(UseAPX);
13993 match(Set dst (OrL (LoadL src1) src2));
13994 effect(KILL cr);
13995 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13996
13997 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13998 ins_encode %{
13999 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14000 %}
14001 ins_pipe(ialu_reg);
14002 %}
14003
14004 // Or Register with Memory
14005 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14006 %{
14007 predicate(!UseAPX);
14008 match(Set dst (OrL dst (LoadL src)));
14009 effect(KILL cr);
14010 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14011
14012 ins_cost(150);
14013 format %{ "orq $dst, $src\t# long" %}
14014 ins_encode %{
14015 __ orq($dst$$Register, $src$$Address);
14016 %}
14017 ins_pipe(ialu_reg_mem);
14018 %}
14019
14020 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14021 %{
14022 predicate(UseAPX);
14023 match(Set dst (OrL src1 (LoadL src2)));
14024 effect(KILL cr);
14025 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14026
14027 ins_cost(150);
14028 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14029 ins_encode %{
14030 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14031 %}
14032 ins_pipe(ialu_reg_mem);
14033 %}
14034
14035 // Or Memory with Register
14036 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14037 %{
14038 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14039 effect(KILL cr);
14040 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14041
14042 ins_cost(150);
14043 format %{ "orq $dst, $src\t# long" %}
14044 ins_encode %{
14045 __ orq($dst$$Address, $src$$Register);
14046 %}
14047 ins_pipe(ialu_mem_reg);
14048 %}
14049
14050 // Or Memory with Immediate
14051 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14052 %{
14053 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14054 effect(KILL cr);
14055 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056
14057 ins_cost(125);
14058 format %{ "orq $dst, $src\t# long" %}
14059 ins_encode %{
14060 __ orq($dst$$Address, $src$$constant);
14061 %}
14062 ins_pipe(ialu_mem_imm);
14063 %}
14064
14065 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14066 %{
14067 // con should be a pure 64-bit power of 2 immediate
14068 // because AND/OR works well enough for 8/32-bit values.
14069 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14070
14071 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14072 effect(KILL cr);
14073
14074 ins_cost(125);
14075 format %{ "btsq $dst, log2($con)\t# long" %}
14076 ins_encode %{
14077 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14078 %}
14079 ins_pipe(ialu_mem_imm);
14080 %}
14081
14082 // Xor Instructions
14083 // Xor Register with Register
14084 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14085 %{
14086 predicate(!UseAPX);
14087 match(Set dst (XorL dst src));
14088 effect(KILL cr);
14089 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14090
14091 format %{ "xorq $dst, $src\t# long" %}
14092 ins_encode %{
14093 __ xorq($dst$$Register, $src$$Register);
14094 %}
14095 ins_pipe(ialu_reg_reg);
14096 %}
14097
14098 // Xor Register with Register using New Data Destination (NDD)
14099 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14100 %{
14101 predicate(UseAPX);
14102 match(Set dst (XorL src1 src2));
14103 effect(KILL cr);
14104 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14105
14106 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14107 ins_encode %{
14108 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14109 %}
14110 ins_pipe(ialu_reg_reg);
14111 %}
14112
14113 // Xor Register with Immediate -1
14114 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14115 %{
14116 predicate(!UseAPX);
14117 match(Set dst (XorL dst imm));
14118
14119 format %{ "notq $dst" %}
14120 ins_encode %{
14121 __ notq($dst$$Register);
14122 %}
14123 ins_pipe(ialu_reg);
14124 %}
14125
14126 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14127 %{
14128 predicate(UseAPX);
14129 match(Set dst (XorL src imm));
14130 flag(PD::Flag_ndd_demotable_opr1);
14131
14132 format %{ "enotq $dst, $src" %}
14133 ins_encode %{
14134 __ enotq($dst$$Register, $src$$Register);
14135 %}
14136 ins_pipe(ialu_reg);
14137 %}
14138
14139 // Xor Register with Immediate
14140 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14141 %{
14142 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14143 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14144 match(Set dst (XorL dst src));
14145 effect(KILL cr);
14146 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14147
14148 format %{ "xorq $dst, $src\t# long" %}
14149 ins_encode %{
14150 __ xorq($dst$$Register, $src$$constant);
14151 %}
14152 ins_pipe(ialu_reg);
14153 %}
14154
14155 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14156 %{
14157 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14158 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14159 match(Set dst (XorL src1 src2));
14160 effect(KILL cr);
14161 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14162
14163 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14164 ins_encode %{
14165 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14166 %}
14167 ins_pipe(ialu_reg);
14168 %}
14169
14170 // Xor Memory with Immediate
14171 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14172 %{
14173 predicate(UseAPX);
14174 match(Set dst (XorL (LoadL src1) src2));
14175 effect(KILL cr);
14176 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14177 ins_cost(150);
14178
14179 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14180 ins_encode %{
14181 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14182 %}
14183 ins_pipe(ialu_reg);
14184 %}
14185
14186 // Xor Register with Memory
14187 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14188 %{
14189 predicate(!UseAPX);
14190 match(Set dst (XorL dst (LoadL src)));
14191 effect(KILL cr);
14192 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14193
14194 ins_cost(150);
14195 format %{ "xorq $dst, $src\t# long" %}
14196 ins_encode %{
14197 __ xorq($dst$$Register, $src$$Address);
14198 %}
14199 ins_pipe(ialu_reg_mem);
14200 %}
14201
14202 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14203 %{
14204 predicate(UseAPX);
14205 match(Set dst (XorL src1 (LoadL src2)));
14206 effect(KILL cr);
14207 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14208
14209 ins_cost(150);
14210 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14211 ins_encode %{
14212 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14213 %}
14214 ins_pipe(ialu_reg_mem);
14215 %}
14216
14217 // Xor Memory with Register
14218 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14219 %{
14220 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14221 effect(KILL cr);
14222 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14223
14224 ins_cost(150);
14225 format %{ "xorq $dst, $src\t# long" %}
14226 ins_encode %{
14227 __ xorq($dst$$Address, $src$$Register);
14228 %}
14229 ins_pipe(ialu_mem_reg);
14230 %}
14231
14232 // Xor Memory with Immediate
14233 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14234 %{
14235 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14236 effect(KILL cr);
14237 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14238
14239 ins_cost(125);
14240 format %{ "xorq $dst, $src\t# long" %}
14241 ins_encode %{
14242 __ xorq($dst$$Address, $src$$constant);
14243 %}
14244 ins_pipe(ialu_mem_imm);
14245 %}
14246
14247 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14248 %{
14249 match(Set dst (CmpLTMask p q));
14250 effect(KILL cr);
14251
14252 ins_cost(400);
14253 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14254 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14255 "negl $dst" %}
14256 ins_encode %{
14257 __ cmpl($p$$Register, $q$$Register);
14258 __ setcc(Assembler::less, $dst$$Register);
14259 __ negl($dst$$Register);
14260 %}
14261 ins_pipe(pipe_slow);
14262 %}
14263
14264 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14265 %{
14266 match(Set dst (CmpLTMask dst zero));
14267 effect(KILL cr);
14268
14269 ins_cost(100);
14270 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14271 ins_encode %{
14272 __ sarl($dst$$Register, 31);
14273 %}
14274 ins_pipe(ialu_reg);
14275 %}
14276
14277 /* Better to save a register than avoid a branch */
14278 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14279 %{
14280 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14281 effect(KILL cr);
14282 ins_cost(300);
14283 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14284 "jge done\n\t"
14285 "addl $p,$y\n"
14286 "done: " %}
14287 ins_encode %{
14288 Register Rp = $p$$Register;
14289 Register Rq = $q$$Register;
14290 Register Ry = $y$$Register;
14291 Label done;
14292 __ subl(Rp, Rq);
14293 __ jccb(Assembler::greaterEqual, done);
14294 __ addl(Rp, Ry);
14295 __ bind(done);
14296 %}
14297 ins_pipe(pipe_cmplt);
14298 %}
14299
14300 /* Better to save a register than avoid a branch */
14301 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14302 %{
14303 match(Set y (AndI (CmpLTMask p q) y));
14304 effect(KILL cr);
14305
14306 ins_cost(300);
14307
14308 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14309 "jlt done\n\t"
14310 "xorl $y, $y\n"
14311 "done: " %}
14312 ins_encode %{
14313 Register Rp = $p$$Register;
14314 Register Rq = $q$$Register;
14315 Register Ry = $y$$Register;
14316 Label done;
14317 __ cmpl(Rp, Rq);
14318 __ jccb(Assembler::less, done);
14319 __ xorl(Ry, Ry);
14320 __ bind(done);
14321 %}
14322 ins_pipe(pipe_cmplt);
14323 %}
14324
14325
14326 //---------- FP Instructions------------------------------------------------
14327
14328 // Really expensive, avoid
14329 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14330 %{
14331 match(Set cr (CmpF src1 src2));
14332
14333 ins_cost(500);
14334 format %{ "ucomiss $src1, $src2\n\t"
14335 "jnp,s exit\n\t"
14336 "pushfq\t# saw NaN, set CF\n\t"
14337 "andq [rsp], #0xffffff2b\n\t"
14338 "popfq\n"
14339 "exit:" %}
14340 ins_encode %{
14341 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14342 emit_cmpfp_fixup(masm);
14343 %}
14344 ins_pipe(pipe_slow);
14345 %}
14346
14347 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14348 match(Set cr (CmpF src1 src2));
14349
14350 ins_cost(100);
14351 format %{ "ucomiss $src1, $src2" %}
14352 ins_encode %{
14353 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14354 %}
14355 ins_pipe(pipe_slow);
14356 %}
14357
14358 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14359 match(Set cr (CmpF src1 (LoadF src2)));
14360
14361 ins_cost(100);
14362 format %{ "ucomiss $src1, $src2" %}
14363 ins_encode %{
14364 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14365 %}
14366 ins_pipe(pipe_slow);
14367 %}
14368
14369 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14370 match(Set cr (CmpF src con));
14371 ins_cost(100);
14372 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14373 ins_encode %{
14374 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14375 %}
14376 ins_pipe(pipe_slow);
14377 %}
14378
14379 // Really expensive, avoid
14380 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14381 %{
14382 match(Set cr (CmpD src1 src2));
14383
14384 ins_cost(500);
14385 format %{ "ucomisd $src1, $src2\n\t"
14386 "jnp,s exit\n\t"
14387 "pushfq\t# saw NaN, set CF\n\t"
14388 "andq [rsp], #0xffffff2b\n\t"
14389 "popfq\n"
14390 "exit:" %}
14391 ins_encode %{
14392 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14393 emit_cmpfp_fixup(masm);
14394 %}
14395 ins_pipe(pipe_slow);
14396 %}
14397
14398 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14399 match(Set cr (CmpD src1 src2));
14400
14401 ins_cost(100);
14402 format %{ "ucomisd $src1, $src2 test" %}
14403 ins_encode %{
14404 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14405 %}
14406 ins_pipe(pipe_slow);
14407 %}
14408
14409 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14410 match(Set cr (CmpD src1 (LoadD src2)));
14411
14412 ins_cost(100);
14413 format %{ "ucomisd $src1, $src2" %}
14414 ins_encode %{
14415 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14416 %}
14417 ins_pipe(pipe_slow);
14418 %}
14419
14420 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14421 match(Set cr (CmpD src con));
14422 ins_cost(100);
14423 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14424 ins_encode %{
14425 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14426 %}
14427 ins_pipe(pipe_slow);
14428 %}
14429
14430 // Compare into -1,0,1
14431 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14432 %{
14433 match(Set dst (CmpF3 src1 src2));
14434 effect(KILL cr);
14435
14436 ins_cost(275);
14437 format %{ "ucomiss $src1, $src2\n\t"
14438 "movl $dst, #-1\n\t"
14439 "jp,s done\n\t"
14440 "jb,s done\n\t"
14441 "setne $dst\n\t"
14442 "movzbl $dst, $dst\n"
14443 "done:" %}
14444 ins_encode %{
14445 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14446 emit_cmpfp3(masm, $dst$$Register);
14447 %}
14448 ins_pipe(pipe_slow);
14449 %}
14450
14451 // Compare into -1,0,1
14452 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14453 %{
14454 match(Set dst (CmpF3 src1 (LoadF src2)));
14455 effect(KILL cr);
14456
14457 ins_cost(275);
14458 format %{ "ucomiss $src1, $src2\n\t"
14459 "movl $dst, #-1\n\t"
14460 "jp,s done\n\t"
14461 "jb,s done\n\t"
14462 "setne $dst\n\t"
14463 "movzbl $dst, $dst\n"
14464 "done:" %}
14465 ins_encode %{
14466 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14467 emit_cmpfp3(masm, $dst$$Register);
14468 %}
14469 ins_pipe(pipe_slow);
14470 %}
14471
14472 // Compare into -1,0,1
14473 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14474 match(Set dst (CmpF3 src con));
14475 effect(KILL cr);
14476
14477 ins_cost(275);
14478 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14479 "movl $dst, #-1\n\t"
14480 "jp,s done\n\t"
14481 "jb,s done\n\t"
14482 "setne $dst\n\t"
14483 "movzbl $dst, $dst\n"
14484 "done:" %}
14485 ins_encode %{
14486 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14487 emit_cmpfp3(masm, $dst$$Register);
14488 %}
14489 ins_pipe(pipe_slow);
14490 %}
14491
14492 // Compare into -1,0,1
14493 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14494 %{
14495 match(Set dst (CmpD3 src1 src2));
14496 effect(KILL cr);
14497
14498 ins_cost(275);
14499 format %{ "ucomisd $src1, $src2\n\t"
14500 "movl $dst, #-1\n\t"
14501 "jp,s done\n\t"
14502 "jb,s done\n\t"
14503 "setne $dst\n\t"
14504 "movzbl $dst, $dst\n"
14505 "done:" %}
14506 ins_encode %{
14507 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14508 emit_cmpfp3(masm, $dst$$Register);
14509 %}
14510 ins_pipe(pipe_slow);
14511 %}
14512
14513 // Compare into -1,0,1
14514 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14515 %{
14516 match(Set dst (CmpD3 src1 (LoadD src2)));
14517 effect(KILL cr);
14518
14519 ins_cost(275);
14520 format %{ "ucomisd $src1, $src2\n\t"
14521 "movl $dst, #-1\n\t"
14522 "jp,s done\n\t"
14523 "jb,s done\n\t"
14524 "setne $dst\n\t"
14525 "movzbl $dst, $dst\n"
14526 "done:" %}
14527 ins_encode %{
14528 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14529 emit_cmpfp3(masm, $dst$$Register);
14530 %}
14531 ins_pipe(pipe_slow);
14532 %}
14533
14534 // Compare into -1,0,1
14535 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14536 match(Set dst (CmpD3 src con));
14537 effect(KILL cr);
14538
14539 ins_cost(275);
14540 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14541 "movl $dst, #-1\n\t"
14542 "jp,s done\n\t"
14543 "jb,s done\n\t"
14544 "setne $dst\n\t"
14545 "movzbl $dst, $dst\n"
14546 "done:" %}
14547 ins_encode %{
14548 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14549 emit_cmpfp3(masm, $dst$$Register);
14550 %}
14551 ins_pipe(pipe_slow);
14552 %}
14553
14554 //----------Arithmetic Conversion Instructions---------------------------------
14555
14556 instruct convF2D_reg_reg(regD dst, regF src)
14557 %{
14558 match(Set dst (ConvF2D src));
14559
14560 format %{ "cvtss2sd $dst, $src" %}
14561 ins_encode %{
14562 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14563 %}
14564 ins_pipe(pipe_slow); // XXX
14565 %}
14566
14567 instruct convF2D_reg_mem(regD dst, memory src)
14568 %{
14569 predicate(UseAVX == 0);
14570 match(Set dst (ConvF2D (LoadF src)));
14571
14572 format %{ "cvtss2sd $dst, $src" %}
14573 ins_encode %{
14574 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14575 %}
14576 ins_pipe(pipe_slow); // XXX
14577 %}
14578
14579 instruct convD2F_reg_reg(regF dst, regD src)
14580 %{
14581 match(Set dst (ConvD2F src));
14582
14583 format %{ "cvtsd2ss $dst, $src" %}
14584 ins_encode %{
14585 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14586 %}
14587 ins_pipe(pipe_slow); // XXX
14588 %}
14589
14590 instruct convD2F_reg_mem(regF dst, memory src)
14591 %{
14592 predicate(UseAVX == 0);
14593 match(Set dst (ConvD2F (LoadD src)));
14594
14595 format %{ "cvtsd2ss $dst, $src" %}
14596 ins_encode %{
14597 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14598 %}
14599 ins_pipe(pipe_slow); // XXX
14600 %}
14601
14602 // XXX do mem variants
14603 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14604 %{
14605 predicate(!VM_Version::supports_avx10_2());
14606 match(Set dst (ConvF2I src));
14607 effect(KILL cr);
14608 format %{ "convert_f2i $dst, $src" %}
14609 ins_encode %{
14610 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14611 %}
14612 ins_pipe(pipe_slow);
14613 %}
14614
14615 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14616 %{
14617 predicate(VM_Version::supports_avx10_2());
14618 match(Set dst (ConvF2I src));
14619 format %{ "evcvttss2sisl $dst, $src" %}
14620 ins_encode %{
14621 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14622 %}
14623 ins_pipe(pipe_slow);
14624 %}
14625
14626 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14627 %{
14628 predicate(VM_Version::supports_avx10_2());
14629 match(Set dst (ConvF2I (LoadF src)));
14630 format %{ "evcvttss2sisl $dst, $src" %}
14631 ins_encode %{
14632 __ evcvttss2sisl($dst$$Register, $src$$Address);
14633 %}
14634 ins_pipe(pipe_slow);
14635 %}
14636
14637 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14638 %{
14639 predicate(!VM_Version::supports_avx10_2());
14640 match(Set dst (ConvF2L src));
14641 effect(KILL cr);
14642 format %{ "convert_f2l $dst, $src"%}
14643 ins_encode %{
14644 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14645 %}
14646 ins_pipe(pipe_slow);
14647 %}
14648
14649 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14650 %{
14651 predicate(VM_Version::supports_avx10_2());
14652 match(Set dst (ConvF2L src));
14653 format %{ "evcvttss2sisq $dst, $src" %}
14654 ins_encode %{
14655 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14656 %}
14657 ins_pipe(pipe_slow);
14658 %}
14659
14660 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14661 %{
14662 predicate(VM_Version::supports_avx10_2());
14663 match(Set dst (ConvF2L (LoadF src)));
14664 format %{ "evcvttss2sisq $dst, $src" %}
14665 ins_encode %{
14666 __ evcvttss2sisq($dst$$Register, $src$$Address);
14667 %}
14668 ins_pipe(pipe_slow);
14669 %}
14670
14671 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14672 %{
14673 predicate(!VM_Version::supports_avx10_2());
14674 match(Set dst (ConvD2I src));
14675 effect(KILL cr);
14676 format %{ "convert_d2i $dst, $src"%}
14677 ins_encode %{
14678 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14679 %}
14680 ins_pipe(pipe_slow);
14681 %}
14682
14683 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14684 %{
14685 predicate(VM_Version::supports_avx10_2());
14686 match(Set dst (ConvD2I src));
14687 format %{ "evcvttsd2sisl $dst, $src" %}
14688 ins_encode %{
14689 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14690 %}
14691 ins_pipe(pipe_slow);
14692 %}
14693
14694 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14695 %{
14696 predicate(VM_Version::supports_avx10_2());
14697 match(Set dst (ConvD2I (LoadD src)));
14698 format %{ "evcvttsd2sisl $dst, $src" %}
14699 ins_encode %{
14700 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14701 %}
14702 ins_pipe(pipe_slow);
14703 %}
14704
14705 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14706 %{
14707 predicate(!VM_Version::supports_avx10_2());
14708 match(Set dst (ConvD2L src));
14709 effect(KILL cr);
14710 format %{ "convert_d2l $dst, $src"%}
14711 ins_encode %{
14712 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14713 %}
14714 ins_pipe(pipe_slow);
14715 %}
14716
14717 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14718 %{
14719 predicate(VM_Version::supports_avx10_2());
14720 match(Set dst (ConvD2L src));
14721 format %{ "evcvttsd2sisq $dst, $src" %}
14722 ins_encode %{
14723 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14724 %}
14725 ins_pipe(pipe_slow);
14726 %}
14727
14728 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14729 %{
14730 predicate(VM_Version::supports_avx10_2());
14731 match(Set dst (ConvD2L (LoadD src)));
14732 format %{ "evcvttsd2sisq $dst, $src" %}
14733 ins_encode %{
14734 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14735 %}
14736 ins_pipe(pipe_slow);
14737 %}
14738
14739 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14740 %{
14741 match(Set dst (RoundD src));
14742 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14743 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14744 ins_encode %{
14745 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14746 %}
14747 ins_pipe(pipe_slow);
14748 %}
14749
14750 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14751 %{
14752 match(Set dst (RoundF src));
14753 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14754 format %{ "round_float $dst,$src" %}
14755 ins_encode %{
14756 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14757 %}
14758 ins_pipe(pipe_slow);
14759 %}
14760
14761 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14762 %{
14763 predicate(!UseXmmI2F);
14764 match(Set dst (ConvI2F src));
14765
14766 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14767 ins_encode %{
14768 if (UseAVX > 0) {
14769 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14770 }
14771 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14772 %}
14773 ins_pipe(pipe_slow); // XXX
14774 %}
14775
14776 instruct convI2F_reg_mem(regF dst, memory src)
14777 %{
14778 predicate(UseAVX == 0);
14779 match(Set dst (ConvI2F (LoadI src)));
14780
14781 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14782 ins_encode %{
14783 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14784 %}
14785 ins_pipe(pipe_slow); // XXX
14786 %}
14787
14788 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14789 %{
14790 predicate(!UseXmmI2D);
14791 match(Set dst (ConvI2D src));
14792
14793 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14794 ins_encode %{
14795 if (UseAVX > 0) {
14796 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14797 }
14798 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14799 %}
14800 ins_pipe(pipe_slow); // XXX
14801 %}
14802
14803 instruct convI2D_reg_mem(regD dst, memory src)
14804 %{
14805 predicate(UseAVX == 0);
14806 match(Set dst (ConvI2D (LoadI src)));
14807
14808 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14809 ins_encode %{
14810 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14811 %}
14812 ins_pipe(pipe_slow); // XXX
14813 %}
14814
14815 instruct convXI2F_reg(regF dst, rRegI src)
14816 %{
14817 predicate(UseXmmI2F);
14818 match(Set dst (ConvI2F src));
14819
14820 format %{ "movdl $dst, $src\n\t"
14821 "cvtdq2psl $dst, $dst\t# i2f" %}
14822 ins_encode %{
14823 __ movdl($dst$$XMMRegister, $src$$Register);
14824 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14825 %}
14826 ins_pipe(pipe_slow); // XXX
14827 %}
14828
14829 instruct convXI2D_reg(regD dst, rRegI src)
14830 %{
14831 predicate(UseXmmI2D);
14832 match(Set dst (ConvI2D src));
14833
14834 format %{ "movdl $dst, $src\n\t"
14835 "cvtdq2pdl $dst, $dst\t# i2d" %}
14836 ins_encode %{
14837 __ movdl($dst$$XMMRegister, $src$$Register);
14838 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14839 %}
14840 ins_pipe(pipe_slow); // XXX
14841 %}
14842
14843 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14844 %{
14845 match(Set dst (ConvL2F src));
14846
14847 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14848 ins_encode %{
14849 if (UseAVX > 0) {
14850 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14851 }
14852 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14853 %}
14854 ins_pipe(pipe_slow); // XXX
14855 %}
14856
14857 instruct convL2F_reg_mem(regF dst, memory src)
14858 %{
14859 predicate(UseAVX == 0);
14860 match(Set dst (ConvL2F (LoadL src)));
14861
14862 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14863 ins_encode %{
14864 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14865 %}
14866 ins_pipe(pipe_slow); // XXX
14867 %}
14868
14869 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14870 %{
14871 match(Set dst (ConvL2D src));
14872
14873 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14874 ins_encode %{
14875 if (UseAVX > 0) {
14876 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14877 }
14878 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14879 %}
14880 ins_pipe(pipe_slow); // XXX
14881 %}
14882
14883 instruct convL2D_reg_mem(regD dst, memory src)
14884 %{
14885 predicate(UseAVX == 0);
14886 match(Set dst (ConvL2D (LoadL src)));
14887
14888 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14889 ins_encode %{
14890 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14891 %}
14892 ins_pipe(pipe_slow); // XXX
14893 %}
14894
14895 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14896 %{
14897 match(Set dst (ConvI2L src));
14898
14899 ins_cost(125);
14900 format %{ "movslq $dst, $src\t# i2l" %}
14901 ins_encode %{
14902 __ movslq($dst$$Register, $src$$Register);
14903 %}
14904 ins_pipe(ialu_reg_reg);
14905 %}
14906
14907 // Zero-extend convert int to long
14908 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14909 %{
14910 match(Set dst (AndL (ConvI2L src) mask));
14911
14912 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14913 ins_encode %{
14914 if ($dst$$reg != $src$$reg) {
14915 __ movl($dst$$Register, $src$$Register);
14916 }
14917 %}
14918 ins_pipe(ialu_reg_reg);
14919 %}
14920
14921 // Zero-extend convert int to long
14922 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14923 %{
14924 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14925
14926 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14927 ins_encode %{
14928 __ movl($dst$$Register, $src$$Address);
14929 %}
14930 ins_pipe(ialu_reg_mem);
14931 %}
14932
14933 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14934 %{
14935 match(Set dst (AndL src mask));
14936
14937 format %{ "movl $dst, $src\t# zero-extend long" %}
14938 ins_encode %{
14939 __ movl($dst$$Register, $src$$Register);
14940 %}
14941 ins_pipe(ialu_reg_reg);
14942 %}
14943
14944 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14945 %{
14946 match(Set dst (ConvL2I src));
14947
14948 format %{ "movl $dst, $src\t# l2i" %}
14949 ins_encode %{
14950 __ movl($dst$$Register, $src$$Register);
14951 %}
14952 ins_pipe(ialu_reg_reg);
14953 %}
14954
14955
14956 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14957 match(Set dst (MoveF2I src));
14958 effect(DEF dst, USE src);
14959
14960 ins_cost(125);
14961 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14962 ins_encode %{
14963 __ movl($dst$$Register, Address(rsp, $src$$disp));
14964 %}
14965 ins_pipe(ialu_reg_mem);
14966 %}
14967
14968 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14969 match(Set dst (MoveI2F src));
14970 effect(DEF dst, USE src);
14971
14972 ins_cost(125);
14973 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14974 ins_encode %{
14975 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14976 %}
14977 ins_pipe(pipe_slow);
14978 %}
14979
14980 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14981 match(Set dst (MoveD2L src));
14982 effect(DEF dst, USE src);
14983
14984 ins_cost(125);
14985 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14986 ins_encode %{
14987 __ movq($dst$$Register, Address(rsp, $src$$disp));
14988 %}
14989 ins_pipe(ialu_reg_mem);
14990 %}
14991
14992 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14993 predicate(!UseXmmLoadAndClearUpper);
14994 match(Set dst (MoveL2D src));
14995 effect(DEF dst, USE src);
14996
14997 ins_cost(125);
14998 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14999 ins_encode %{
15000 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15001 %}
15002 ins_pipe(pipe_slow);
15003 %}
15004
15005 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15006 predicate(UseXmmLoadAndClearUpper);
15007 match(Set dst (MoveL2D src));
15008 effect(DEF dst, USE src);
15009
15010 ins_cost(125);
15011 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15012 ins_encode %{
15013 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15014 %}
15015 ins_pipe(pipe_slow);
15016 %}
15017
15018
15019 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15020 match(Set dst (MoveF2I src));
15021 effect(DEF dst, USE src);
15022
15023 ins_cost(95); // XXX
15024 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15025 ins_encode %{
15026 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15027 %}
15028 ins_pipe(pipe_slow);
15029 %}
15030
15031 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15032 match(Set dst (MoveI2F src));
15033 effect(DEF dst, USE src);
15034
15035 ins_cost(100);
15036 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15037 ins_encode %{
15038 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15039 %}
15040 ins_pipe( ialu_mem_reg );
15041 %}
15042
15043 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15044 match(Set dst (MoveD2L src));
15045 effect(DEF dst, USE src);
15046
15047 ins_cost(95); // XXX
15048 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15049 ins_encode %{
15050 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15051 %}
15052 ins_pipe(pipe_slow);
15053 %}
15054
15055 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15056 match(Set dst (MoveL2D src));
15057 effect(DEF dst, USE src);
15058
15059 ins_cost(100);
15060 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15061 ins_encode %{
15062 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15063 %}
15064 ins_pipe(ialu_mem_reg);
15065 %}
15066
15067 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15068 match(Set dst (MoveF2I src));
15069 effect(DEF dst, USE src);
15070 ins_cost(85);
15071 format %{ "movd $dst,$src\t# MoveF2I" %}
15072 ins_encode %{
15073 __ movdl($dst$$Register, $src$$XMMRegister);
15074 %}
15075 ins_pipe( pipe_slow );
15076 %}
15077
15078 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15079 match(Set dst (MoveD2L src));
15080 effect(DEF dst, USE src);
15081 ins_cost(85);
15082 format %{ "movd $dst,$src\t# MoveD2L" %}
15083 ins_encode %{
15084 __ movdq($dst$$Register, $src$$XMMRegister);
15085 %}
15086 ins_pipe( pipe_slow );
15087 %}
15088
15089 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15090 match(Set dst (MoveI2F src));
15091 effect(DEF dst, USE src);
15092 ins_cost(100);
15093 format %{ "movd $dst,$src\t# MoveI2F" %}
15094 ins_encode %{
15095 __ movdl($dst$$XMMRegister, $src$$Register);
15096 %}
15097 ins_pipe( pipe_slow );
15098 %}
15099
15100 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15101 match(Set dst (MoveL2D src));
15102 effect(DEF dst, USE src);
15103 ins_cost(100);
15104 format %{ "movd $dst,$src\t# MoveL2D" %}
15105 ins_encode %{
15106 __ movdq($dst$$XMMRegister, $src$$Register);
15107 %}
15108 ins_pipe( pipe_slow );
15109 %}
15110
15111 // Fast clearing of an array
15112 // Small non-constant lenght ClearArray for non-AVX512 targets.
15113 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15114 Universe dummy, rFlagsReg cr)
15115 %{
15116 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15117 match(Set dummy (ClearArray cnt base));
15118 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15119
15120 format %{ $$template
15121 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15122 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15123 $$emit$$"jg LARGE\n\t"
15124 $$emit$$"dec rcx\n\t"
15125 $$emit$$"js DONE\t# Zero length\n\t"
15126 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15127 $$emit$$"dec rcx\n\t"
15128 $$emit$$"jge LOOP\n\t"
15129 $$emit$$"jmp DONE\n\t"
15130 $$emit$$"# LARGE:\n\t"
15131 if (UseFastStosb) {
15132 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15133 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15134 } else if (UseXMMForObjInit) {
15135 $$emit$$"mov rdi,rax\n\t"
15136 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15137 $$emit$$"jmpq L_zero_64_bytes\n\t"
15138 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15139 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15140 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15141 $$emit$$"add 0x40,rax\n\t"
15142 $$emit$$"# L_zero_64_bytes:\n\t"
15143 $$emit$$"sub 0x8,rcx\n\t"
15144 $$emit$$"jge L_loop\n\t"
15145 $$emit$$"add 0x4,rcx\n\t"
15146 $$emit$$"jl L_tail\n\t"
15147 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15148 $$emit$$"add 0x20,rax\n\t"
15149 $$emit$$"sub 0x4,rcx\n\t"
15150 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15151 $$emit$$"add 0x4,rcx\n\t"
15152 $$emit$$"jle L_end\n\t"
15153 $$emit$$"dec rcx\n\t"
15154 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15155 $$emit$$"vmovq xmm0,(rax)\n\t"
15156 $$emit$$"add 0x8,rax\n\t"
15157 $$emit$$"dec rcx\n\t"
15158 $$emit$$"jge L_sloop\n\t"
15159 $$emit$$"# L_end:\n\t"
15160 } else {
15161 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15162 }
15163 $$emit$$"# DONE"
15164 %}
15165 ins_encode %{
15166 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15167 $tmp$$XMMRegister, false, knoreg);
15168 %}
15169 ins_pipe(pipe_slow);
15170 %}
15171
15172 // Small non-constant length ClearArray for AVX512 targets.
15173 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15174 Universe dummy, rFlagsReg cr)
15175 %{
15176 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15177 match(Set dummy (ClearArray cnt base));
15178 ins_cost(125);
15179 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15180
15181 format %{ $$template
15182 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15183 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15184 $$emit$$"jg LARGE\n\t"
15185 $$emit$$"dec rcx\n\t"
15186 $$emit$$"js DONE\t# Zero length\n\t"
15187 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15188 $$emit$$"dec rcx\n\t"
15189 $$emit$$"jge LOOP\n\t"
15190 $$emit$$"jmp DONE\n\t"
15191 $$emit$$"# LARGE:\n\t"
15192 if (UseFastStosb) {
15193 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15194 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15195 } else if (UseXMMForObjInit) {
15196 $$emit$$"mov rdi,rax\n\t"
15197 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15198 $$emit$$"jmpq L_zero_64_bytes\n\t"
15199 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15200 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15201 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15202 $$emit$$"add 0x40,rax\n\t"
15203 $$emit$$"# L_zero_64_bytes:\n\t"
15204 $$emit$$"sub 0x8,rcx\n\t"
15205 $$emit$$"jge L_loop\n\t"
15206 $$emit$$"add 0x4,rcx\n\t"
15207 $$emit$$"jl L_tail\n\t"
15208 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15209 $$emit$$"add 0x20,rax\n\t"
15210 $$emit$$"sub 0x4,rcx\n\t"
15211 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15212 $$emit$$"add 0x4,rcx\n\t"
15213 $$emit$$"jle L_end\n\t"
15214 $$emit$$"dec rcx\n\t"
15215 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15216 $$emit$$"vmovq xmm0,(rax)\n\t"
15217 $$emit$$"add 0x8,rax\n\t"
15218 $$emit$$"dec rcx\n\t"
15219 $$emit$$"jge L_sloop\n\t"
15220 $$emit$$"# L_end:\n\t"
15221 } else {
15222 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15223 }
15224 $$emit$$"# DONE"
15225 %}
15226 ins_encode %{
15227 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15228 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15229 %}
15230 ins_pipe(pipe_slow);
15231 %}
15232
15233 // Large non-constant length ClearArray for non-AVX512 targets.
15234 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15235 Universe dummy, rFlagsReg cr)
15236 %{
15237 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15238 match(Set dummy (ClearArray cnt base));
15239 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15240
15241 format %{ $$template
15242 if (UseFastStosb) {
15243 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15244 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15245 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15246 } else if (UseXMMForObjInit) {
15247 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15248 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15249 $$emit$$"jmpq L_zero_64_bytes\n\t"
15250 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15251 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15252 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15253 $$emit$$"add 0x40,rax\n\t"
15254 $$emit$$"# L_zero_64_bytes:\n\t"
15255 $$emit$$"sub 0x8,rcx\n\t"
15256 $$emit$$"jge L_loop\n\t"
15257 $$emit$$"add 0x4,rcx\n\t"
15258 $$emit$$"jl L_tail\n\t"
15259 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15260 $$emit$$"add 0x20,rax\n\t"
15261 $$emit$$"sub 0x4,rcx\n\t"
15262 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15263 $$emit$$"add 0x4,rcx\n\t"
15264 $$emit$$"jle L_end\n\t"
15265 $$emit$$"dec rcx\n\t"
15266 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15267 $$emit$$"vmovq xmm0,(rax)\n\t"
15268 $$emit$$"add 0x8,rax\n\t"
15269 $$emit$$"dec rcx\n\t"
15270 $$emit$$"jge L_sloop\n\t"
15271 $$emit$$"# L_end:\n\t"
15272 } else {
15273 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15274 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15275 }
15276 %}
15277 ins_encode %{
15278 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15279 $tmp$$XMMRegister, true, knoreg);
15280 %}
15281 ins_pipe(pipe_slow);
15282 %}
15283
15284 // Large non-constant length ClearArray for AVX512 targets.
15285 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15286 Universe dummy, rFlagsReg cr)
15287 %{
15288 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15289 match(Set dummy (ClearArray cnt base));
15290 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15291
15292 format %{ $$template
15293 if (UseFastStosb) {
15294 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15295 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15296 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15297 } else if (UseXMMForObjInit) {
15298 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15299 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15300 $$emit$$"jmpq L_zero_64_bytes\n\t"
15301 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15302 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15303 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15304 $$emit$$"add 0x40,rax\n\t"
15305 $$emit$$"# L_zero_64_bytes:\n\t"
15306 $$emit$$"sub 0x8,rcx\n\t"
15307 $$emit$$"jge L_loop\n\t"
15308 $$emit$$"add 0x4,rcx\n\t"
15309 $$emit$$"jl L_tail\n\t"
15310 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15311 $$emit$$"add 0x20,rax\n\t"
15312 $$emit$$"sub 0x4,rcx\n\t"
15313 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15314 $$emit$$"add 0x4,rcx\n\t"
15315 $$emit$$"jle L_end\n\t"
15316 $$emit$$"dec rcx\n\t"
15317 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15318 $$emit$$"vmovq xmm0,(rax)\n\t"
15319 $$emit$$"add 0x8,rax\n\t"
15320 $$emit$$"dec rcx\n\t"
15321 $$emit$$"jge L_sloop\n\t"
15322 $$emit$$"# L_end:\n\t"
15323 } else {
15324 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15325 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15326 }
15327 %}
15328 ins_encode %{
15329 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15330 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15331 %}
15332 ins_pipe(pipe_slow);
15333 %}
15334
15335 // Small constant length ClearArray for AVX512 targets.
15336 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15337 %{
15338 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15339 match(Set dummy (ClearArray cnt base));
15340 ins_cost(100);
15341 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15342 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15343 ins_encode %{
15344 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15345 %}
15346 ins_pipe(pipe_slow);
15347 %}
15348
15349 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15350 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15351 %{
15352 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15353 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15354 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15355
15356 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15357 ins_encode %{
15358 __ string_compare($str1$$Register, $str2$$Register,
15359 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15360 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15361 %}
15362 ins_pipe( pipe_slow );
15363 %}
15364
15365 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15366 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15367 %{
15368 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15369 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15370 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15371
15372 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15373 ins_encode %{
15374 __ string_compare($str1$$Register, $str2$$Register,
15375 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15376 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15377 %}
15378 ins_pipe( pipe_slow );
15379 %}
15380
15381 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15382 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15383 %{
15384 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15385 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15386 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15387
15388 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15389 ins_encode %{
15390 __ string_compare($str1$$Register, $str2$$Register,
15391 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15392 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15393 %}
15394 ins_pipe( pipe_slow );
15395 %}
15396
15397 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15398 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15399 %{
15400 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15401 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15402 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15403
15404 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15405 ins_encode %{
15406 __ string_compare($str1$$Register, $str2$$Register,
15407 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15408 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15409 %}
15410 ins_pipe( pipe_slow );
15411 %}
15412
15413 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15414 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15415 %{
15416 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15417 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15418 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15419
15420 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15421 ins_encode %{
15422 __ string_compare($str1$$Register, $str2$$Register,
15423 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15424 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15425 %}
15426 ins_pipe( pipe_slow );
15427 %}
15428
15429 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15430 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15431 %{
15432 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15433 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15434 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15435
15436 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15437 ins_encode %{
15438 __ string_compare($str1$$Register, $str2$$Register,
15439 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15440 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15441 %}
15442 ins_pipe( pipe_slow );
15443 %}
15444
15445 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15446 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15447 %{
15448 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15449 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15450 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15451
15452 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15453 ins_encode %{
15454 __ string_compare($str2$$Register, $str1$$Register,
15455 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15456 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15457 %}
15458 ins_pipe( pipe_slow );
15459 %}
15460
15461 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15462 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15463 %{
15464 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15465 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15466 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15467
15468 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15469 ins_encode %{
15470 __ string_compare($str2$$Register, $str1$$Register,
15471 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15472 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15473 %}
15474 ins_pipe( pipe_slow );
15475 %}
15476
15477 // fast search of substring with known size.
15478 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15479 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15480 %{
15481 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15482 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15483 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15484
15485 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15486 ins_encode %{
15487 int icnt2 = (int)$int_cnt2$$constant;
15488 if (icnt2 >= 16) {
15489 // IndexOf for constant substrings with size >= 16 elements
15490 // which don't need to be loaded through stack.
15491 __ string_indexofC8($str1$$Register, $str2$$Register,
15492 $cnt1$$Register, $cnt2$$Register,
15493 icnt2, $result$$Register,
15494 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15495 } else {
15496 // Small strings are loaded through stack if they cross page boundary.
15497 __ string_indexof($str1$$Register, $str2$$Register,
15498 $cnt1$$Register, $cnt2$$Register,
15499 icnt2, $result$$Register,
15500 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15501 }
15502 %}
15503 ins_pipe( pipe_slow );
15504 %}
15505
15506 // fast search of substring with known size.
15507 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15508 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15509 %{
15510 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15511 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15512 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15513
15514 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15515 ins_encode %{
15516 int icnt2 = (int)$int_cnt2$$constant;
15517 if (icnt2 >= 8) {
15518 // IndexOf for constant substrings with size >= 8 elements
15519 // which don't need to be loaded through stack.
15520 __ string_indexofC8($str1$$Register, $str2$$Register,
15521 $cnt1$$Register, $cnt2$$Register,
15522 icnt2, $result$$Register,
15523 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15524 } else {
15525 // Small strings are loaded through stack if they cross page boundary.
15526 __ string_indexof($str1$$Register, $str2$$Register,
15527 $cnt1$$Register, $cnt2$$Register,
15528 icnt2, $result$$Register,
15529 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15530 }
15531 %}
15532 ins_pipe( pipe_slow );
15533 %}
15534
15535 // fast search of substring with known size.
15536 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15537 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15538 %{
15539 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15540 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15541 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15542
15543 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15544 ins_encode %{
15545 int icnt2 = (int)$int_cnt2$$constant;
15546 if (icnt2 >= 8) {
15547 // IndexOf for constant substrings with size >= 8 elements
15548 // which don't need to be loaded through stack.
15549 __ string_indexofC8($str1$$Register, $str2$$Register,
15550 $cnt1$$Register, $cnt2$$Register,
15551 icnt2, $result$$Register,
15552 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15553 } else {
15554 // Small strings are loaded through stack if they cross page boundary.
15555 __ string_indexof($str1$$Register, $str2$$Register,
15556 $cnt1$$Register, $cnt2$$Register,
15557 icnt2, $result$$Register,
15558 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15559 }
15560 %}
15561 ins_pipe( pipe_slow );
15562 %}
15563
15564 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15565 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15566 %{
15567 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15568 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15569 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15570
15571 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15572 ins_encode %{
15573 __ string_indexof($str1$$Register, $str2$$Register,
15574 $cnt1$$Register, $cnt2$$Register,
15575 (-1), $result$$Register,
15576 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15577 %}
15578 ins_pipe( pipe_slow );
15579 %}
15580
15581 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15582 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15583 %{
15584 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15585 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15586 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15587
15588 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15589 ins_encode %{
15590 __ string_indexof($str1$$Register, $str2$$Register,
15591 $cnt1$$Register, $cnt2$$Register,
15592 (-1), $result$$Register,
15593 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15594 %}
15595 ins_pipe( pipe_slow );
15596 %}
15597
15598 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15599 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15600 %{
15601 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15602 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15603 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15604
15605 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15606 ins_encode %{
15607 __ string_indexof($str1$$Register, $str2$$Register,
15608 $cnt1$$Register, $cnt2$$Register,
15609 (-1), $result$$Register,
15610 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15611 %}
15612 ins_pipe( pipe_slow );
15613 %}
15614
15615 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15616 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15617 %{
15618 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15619 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15620 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15621 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15622 ins_encode %{
15623 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15624 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15625 %}
15626 ins_pipe( pipe_slow );
15627 %}
15628
15629 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15630 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15631 %{
15632 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15633 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15634 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15635 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15636 ins_encode %{
15637 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15638 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15639 %}
15640 ins_pipe( pipe_slow );
15641 %}
15642
15643 // fast string equals
15644 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15645 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15646 %{
15647 predicate(!VM_Version::supports_avx512vlbw());
15648 match(Set result (StrEquals (Binary str1 str2) cnt));
15649 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15650
15651 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15652 ins_encode %{
15653 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15654 $cnt$$Register, $result$$Register, $tmp3$$Register,
15655 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15656 %}
15657 ins_pipe( pipe_slow );
15658 %}
15659
15660 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15661 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15662 %{
15663 predicate(VM_Version::supports_avx512vlbw());
15664 match(Set result (StrEquals (Binary str1 str2) cnt));
15665 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15666
15667 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15668 ins_encode %{
15669 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15670 $cnt$$Register, $result$$Register, $tmp3$$Register,
15671 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15672 %}
15673 ins_pipe( pipe_slow );
15674 %}
15675
15676 // fast array equals
15677 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15678 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15679 %{
15680 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15681 match(Set result (AryEq ary1 ary2));
15682 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15683
15684 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15685 ins_encode %{
15686 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15687 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15688 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15689 %}
15690 ins_pipe( pipe_slow );
15691 %}
15692
15693 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15694 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15695 %{
15696 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15697 match(Set result (AryEq ary1 ary2));
15698 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15699
15700 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15701 ins_encode %{
15702 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15703 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15704 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15705 %}
15706 ins_pipe( pipe_slow );
15707 %}
15708
15709 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15710 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15711 %{
15712 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15713 match(Set result (AryEq ary1 ary2));
15714 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15715
15716 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15717 ins_encode %{
15718 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15719 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15720 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15721 %}
15722 ins_pipe( pipe_slow );
15723 %}
15724
15725 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15726 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15727 %{
15728 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15729 match(Set result (AryEq ary1 ary2));
15730 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15731
15732 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15733 ins_encode %{
15734 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15735 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15736 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15737 %}
15738 ins_pipe( pipe_slow );
15739 %}
15740
15741 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15742 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15743 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15744 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15745 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15746 %{
15747 predicate(UseAVX >= 2);
15748 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15749 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15750 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15751 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15752 USE basic_type, KILL cr);
15753
15754 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15755 ins_encode %{
15756 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15757 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15758 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15759 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15760 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15761 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15762 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15763 %}
15764 ins_pipe( pipe_slow );
15765 %}
15766
15767 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15768 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15769 %{
15770 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15771 match(Set result (CountPositives ary1 len));
15772 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15773
15774 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15775 ins_encode %{
15776 __ count_positives($ary1$$Register, $len$$Register,
15777 $result$$Register, $tmp3$$Register,
15778 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15779 %}
15780 ins_pipe( pipe_slow );
15781 %}
15782
15783 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15784 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15785 %{
15786 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15787 match(Set result (CountPositives ary1 len));
15788 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15789
15790 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15791 ins_encode %{
15792 __ count_positives($ary1$$Register, $len$$Register,
15793 $result$$Register, $tmp3$$Register,
15794 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15795 %}
15796 ins_pipe( pipe_slow );
15797 %}
15798
15799 // fast char[] to byte[] compression
15800 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15801 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15802 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15803 match(Set result (StrCompressedCopy src (Binary dst len)));
15804 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15805 USE_KILL len, KILL tmp5, KILL cr);
15806
15807 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15808 ins_encode %{
15809 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15810 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15811 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15812 knoreg, knoreg);
15813 %}
15814 ins_pipe( pipe_slow );
15815 %}
15816
15817 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15818 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15819 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15820 match(Set result (StrCompressedCopy src (Binary dst len)));
15821 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15822 USE_KILL len, KILL tmp5, KILL cr);
15823
15824 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15825 ins_encode %{
15826 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15827 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15828 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15829 $ktmp1$$KRegister, $ktmp2$$KRegister);
15830 %}
15831 ins_pipe( pipe_slow );
15832 %}
15833 // fast byte[] to char[] inflation
15834 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15835 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15836 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15837 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15838 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15839
15840 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15841 ins_encode %{
15842 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15843 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15844 %}
15845 ins_pipe( pipe_slow );
15846 %}
15847
15848 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15849 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15850 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15851 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15852 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15853
15854 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15855 ins_encode %{
15856 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15857 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15858 %}
15859 ins_pipe( pipe_slow );
15860 %}
15861
15862 // encode char[] to byte[] in ISO_8859_1
15863 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15864 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15865 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15866 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15867 match(Set result (EncodeISOArray src (Binary dst len)));
15868 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15869
15870 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15871 ins_encode %{
15872 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15873 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15874 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15875 %}
15876 ins_pipe( pipe_slow );
15877 %}
15878
15879 // encode char[] to byte[] in ASCII
15880 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15881 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15882 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15883 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15884 match(Set result (EncodeISOArray src (Binary dst len)));
15885 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15886
15887 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15888 ins_encode %{
15889 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15890 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15891 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15892 %}
15893 ins_pipe( pipe_slow );
15894 %}
15895
15896 //----------Overflow Math Instructions-----------------------------------------
15897
15898 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15899 %{
15900 match(Set cr (OverflowAddI op1 op2));
15901 effect(DEF cr, USE_KILL op1, USE op2);
15902
15903 format %{ "addl $op1, $op2\t# overflow check int" %}
15904
15905 ins_encode %{
15906 __ addl($op1$$Register, $op2$$Register);
15907 %}
15908 ins_pipe(ialu_reg_reg);
15909 %}
15910
15911 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15912 %{
15913 match(Set cr (OverflowAddI op1 op2));
15914 effect(DEF cr, USE_KILL op1, USE op2);
15915
15916 format %{ "addl $op1, $op2\t# overflow check int" %}
15917
15918 ins_encode %{
15919 __ addl($op1$$Register, $op2$$constant);
15920 %}
15921 ins_pipe(ialu_reg_reg);
15922 %}
15923
15924 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15925 %{
15926 match(Set cr (OverflowAddL op1 op2));
15927 effect(DEF cr, USE_KILL op1, USE op2);
15928
15929 format %{ "addq $op1, $op2\t# overflow check long" %}
15930 ins_encode %{
15931 __ addq($op1$$Register, $op2$$Register);
15932 %}
15933 ins_pipe(ialu_reg_reg);
15934 %}
15935
15936 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15937 %{
15938 match(Set cr (OverflowAddL op1 op2));
15939 effect(DEF cr, USE_KILL op1, USE op2);
15940
15941 format %{ "addq $op1, $op2\t# overflow check long" %}
15942 ins_encode %{
15943 __ addq($op1$$Register, $op2$$constant);
15944 %}
15945 ins_pipe(ialu_reg_reg);
15946 %}
15947
15948 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15949 %{
15950 match(Set cr (OverflowSubI op1 op2));
15951
15952 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15953 ins_encode %{
15954 __ cmpl($op1$$Register, $op2$$Register);
15955 %}
15956 ins_pipe(ialu_reg_reg);
15957 %}
15958
15959 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15960 %{
15961 match(Set cr (OverflowSubI op1 op2));
15962
15963 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15964 ins_encode %{
15965 __ cmpl($op1$$Register, $op2$$constant);
15966 %}
15967 ins_pipe(ialu_reg_reg);
15968 %}
15969
15970 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15971 %{
15972 match(Set cr (OverflowSubL op1 op2));
15973
15974 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15975 ins_encode %{
15976 __ cmpq($op1$$Register, $op2$$Register);
15977 %}
15978 ins_pipe(ialu_reg_reg);
15979 %}
15980
15981 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15982 %{
15983 match(Set cr (OverflowSubL op1 op2));
15984
15985 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15986 ins_encode %{
15987 __ cmpq($op1$$Register, $op2$$constant);
15988 %}
15989 ins_pipe(ialu_reg_reg);
15990 %}
15991
15992 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15993 %{
15994 match(Set cr (OverflowSubI zero op2));
15995 effect(DEF cr, USE_KILL op2);
15996
15997 format %{ "negl $op2\t# overflow check int" %}
15998 ins_encode %{
15999 __ negl($op2$$Register);
16000 %}
16001 ins_pipe(ialu_reg_reg);
16002 %}
16003
16004 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16005 %{
16006 match(Set cr (OverflowSubL zero op2));
16007 effect(DEF cr, USE_KILL op2);
16008
16009 format %{ "negq $op2\t# overflow check long" %}
16010 ins_encode %{
16011 __ negq($op2$$Register);
16012 %}
16013 ins_pipe(ialu_reg_reg);
16014 %}
16015
16016 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16017 %{
16018 match(Set cr (OverflowMulI op1 op2));
16019 effect(DEF cr, USE_KILL op1, USE op2);
16020
16021 format %{ "imull $op1, $op2\t# overflow check int" %}
16022 ins_encode %{
16023 __ imull($op1$$Register, $op2$$Register);
16024 %}
16025 ins_pipe(ialu_reg_reg_alu0);
16026 %}
16027
16028 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16029 %{
16030 match(Set cr (OverflowMulI op1 op2));
16031 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16032
16033 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16034 ins_encode %{
16035 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16036 %}
16037 ins_pipe(ialu_reg_reg_alu0);
16038 %}
16039
16040 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16041 %{
16042 match(Set cr (OverflowMulL op1 op2));
16043 effect(DEF cr, USE_KILL op1, USE op2);
16044
16045 format %{ "imulq $op1, $op2\t# overflow check long" %}
16046 ins_encode %{
16047 __ imulq($op1$$Register, $op2$$Register);
16048 %}
16049 ins_pipe(ialu_reg_reg_alu0);
16050 %}
16051
16052 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16053 %{
16054 match(Set cr (OverflowMulL op1 op2));
16055 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16056
16057 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16058 ins_encode %{
16059 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16060 %}
16061 ins_pipe(ialu_reg_reg_alu0);
16062 %}
16063
16064
16065 //----------Control Flow Instructions------------------------------------------
16066 // Signed compare Instructions
16067
16068 // XXX more variants!!
16069 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16070 %{
16071 match(Set cr (CmpI op1 op2));
16072 effect(DEF cr, USE op1, USE op2);
16073
16074 format %{ "cmpl $op1, $op2" %}
16075 ins_encode %{
16076 __ cmpl($op1$$Register, $op2$$Register);
16077 %}
16078 ins_pipe(ialu_cr_reg_reg);
16079 %}
16080
16081 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16082 %{
16083 match(Set cr (CmpI op1 op2));
16084
16085 format %{ "cmpl $op1, $op2" %}
16086 ins_encode %{
16087 __ cmpl($op1$$Register, $op2$$constant);
16088 %}
16089 ins_pipe(ialu_cr_reg_imm);
16090 %}
16091
16092 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16093 %{
16094 match(Set cr (CmpI op1 (LoadI op2)));
16095
16096 ins_cost(500); // XXX
16097 format %{ "cmpl $op1, $op2" %}
16098 ins_encode %{
16099 __ cmpl($op1$$Register, $op2$$Address);
16100 %}
16101 ins_pipe(ialu_cr_reg_mem);
16102 %}
16103
16104 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16105 %{
16106 match(Set cr (CmpI src zero));
16107
16108 format %{ "testl $src, $src" %}
16109 ins_encode %{
16110 __ testl($src$$Register, $src$$Register);
16111 %}
16112 ins_pipe(ialu_cr_reg_imm);
16113 %}
16114
16115 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16116 %{
16117 match(Set cr (CmpI (AndI src con) zero));
16118
16119 format %{ "testl $src, $con" %}
16120 ins_encode %{
16121 __ testl($src$$Register, $con$$constant);
16122 %}
16123 ins_pipe(ialu_cr_reg_imm);
16124 %}
16125
16126 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16127 %{
16128 match(Set cr (CmpI (AndI src1 src2) zero));
16129
16130 format %{ "testl $src1, $src2" %}
16131 ins_encode %{
16132 __ testl($src1$$Register, $src2$$Register);
16133 %}
16134 ins_pipe(ialu_cr_reg_imm);
16135 %}
16136
16137 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16138 %{
16139 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16140
16141 format %{ "testl $src, $mem" %}
16142 ins_encode %{
16143 __ testl($src$$Register, $mem$$Address);
16144 %}
16145 ins_pipe(ialu_cr_reg_mem);
16146 %}
16147
16148 // Unsigned compare Instructions; really, same as signed except they
16149 // produce an rFlagsRegU instead of rFlagsReg.
16150 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16151 %{
16152 match(Set cr (CmpU op1 op2));
16153
16154 format %{ "cmpl $op1, $op2\t# unsigned" %}
16155 ins_encode %{
16156 __ cmpl($op1$$Register, $op2$$Register);
16157 %}
16158 ins_pipe(ialu_cr_reg_reg);
16159 %}
16160
16161 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16162 %{
16163 match(Set cr (CmpU op1 op2));
16164
16165 format %{ "cmpl $op1, $op2\t# unsigned" %}
16166 ins_encode %{
16167 __ cmpl($op1$$Register, $op2$$constant);
16168 %}
16169 ins_pipe(ialu_cr_reg_imm);
16170 %}
16171
16172 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16173 %{
16174 match(Set cr (CmpU op1 (LoadI op2)));
16175
16176 ins_cost(500); // XXX
16177 format %{ "cmpl $op1, $op2\t# unsigned" %}
16178 ins_encode %{
16179 __ cmpl($op1$$Register, $op2$$Address);
16180 %}
16181 ins_pipe(ialu_cr_reg_mem);
16182 %}
16183
16184 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16185 %{
16186 match(Set cr (CmpU src zero));
16187
16188 format %{ "testl $src, $src\t# unsigned" %}
16189 ins_encode %{
16190 __ testl($src$$Register, $src$$Register);
16191 %}
16192 ins_pipe(ialu_cr_reg_imm);
16193 %}
16194
16195 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16196 %{
16197 match(Set cr (CmpP op1 op2));
16198
16199 format %{ "cmpq $op1, $op2\t# ptr" %}
16200 ins_encode %{
16201 __ cmpq($op1$$Register, $op2$$Register);
16202 %}
16203 ins_pipe(ialu_cr_reg_reg);
16204 %}
16205
16206 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16207 %{
16208 match(Set cr (CmpP op1 (LoadP op2)));
16209 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16210
16211 ins_cost(500); // XXX
16212 format %{ "cmpq $op1, $op2\t# ptr" %}
16213 ins_encode %{
16214 __ cmpq($op1$$Register, $op2$$Address);
16215 %}
16216 ins_pipe(ialu_cr_reg_mem);
16217 %}
16218
16219 // XXX this is generalized by compP_rReg_mem???
16220 // Compare raw pointer (used in out-of-heap check).
16221 // Only works because non-oop pointers must be raw pointers
16222 // and raw pointers have no anti-dependencies.
16223 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16224 %{
16225 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16226 n->in(2)->as_Load()->barrier_data() == 0);
16227 match(Set cr (CmpP op1 (LoadP op2)));
16228
16229 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16230 ins_encode %{
16231 __ cmpq($op1$$Register, $op2$$Address);
16232 %}
16233 ins_pipe(ialu_cr_reg_mem);
16234 %}
16235
16236 // This will generate a signed flags result. This should be OK since
16237 // any compare to a zero should be eq/neq.
16238 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16239 %{
16240 match(Set cr (CmpP src zero));
16241
16242 format %{ "testq $src, $src\t# ptr" %}
16243 ins_encode %{
16244 __ testq($src$$Register, $src$$Register);
16245 %}
16246 ins_pipe(ialu_cr_reg_imm);
16247 %}
16248
16249 // This will generate a signed flags result. This should be OK since
16250 // any compare to a zero should be eq/neq.
16251 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16252 %{
16253 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16254 n->in(1)->as_Load()->barrier_data() == 0);
16255 match(Set cr (CmpP (LoadP op) zero));
16256
16257 ins_cost(500); // XXX
16258 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16259 ins_encode %{
16260 __ testq($op$$Address, 0xFFFFFFFF);
16261 %}
16262 ins_pipe(ialu_cr_reg_imm);
16263 %}
16264
16265 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16266 %{
16267 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16268 n->in(1)->as_Load()->barrier_data() == 0);
16269 match(Set cr (CmpP (LoadP mem) zero));
16270
16271 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16272 ins_encode %{
16273 __ cmpq(r12, $mem$$Address);
16274 %}
16275 ins_pipe(ialu_cr_reg_mem);
16276 %}
16277
16278 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16279 %{
16280 match(Set cr (CmpN op1 op2));
16281
16282 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16283 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16284 ins_pipe(ialu_cr_reg_reg);
16285 %}
16286
16287 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16288 %{
16289 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16290 match(Set cr (CmpN src (LoadN mem)));
16291
16292 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16293 ins_encode %{
16294 __ cmpl($src$$Register, $mem$$Address);
16295 %}
16296 ins_pipe(ialu_cr_reg_mem);
16297 %}
16298
16299 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16300 match(Set cr (CmpN op1 op2));
16301
16302 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16303 ins_encode %{
16304 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16305 %}
16306 ins_pipe(ialu_cr_reg_imm);
16307 %}
16308
16309 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16310 %{
16311 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16312 match(Set cr (CmpN src (LoadN mem)));
16313
16314 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16315 ins_encode %{
16316 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16317 %}
16318 ins_pipe(ialu_cr_reg_mem);
16319 %}
16320
16321 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16322 match(Set cr (CmpN op1 op2));
16323
16324 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16325 ins_encode %{
16326 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16327 %}
16328 ins_pipe(ialu_cr_reg_imm);
16329 %}
16330
16331 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16332 %{
16333 predicate(!UseCompactObjectHeaders);
16334 match(Set cr (CmpN src (LoadNKlass mem)));
16335
16336 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16337 ins_encode %{
16338 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16339 %}
16340 ins_pipe(ialu_cr_reg_mem);
16341 %}
16342
16343 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16344 match(Set cr (CmpN src zero));
16345
16346 format %{ "testl $src, $src\t# compressed ptr" %}
16347 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16348 ins_pipe(ialu_cr_reg_imm);
16349 %}
16350
16351 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16352 %{
16353 predicate(CompressedOops::base() != nullptr &&
16354 n->in(1)->as_Load()->barrier_data() == 0);
16355 match(Set cr (CmpN (LoadN mem) zero));
16356
16357 ins_cost(500); // XXX
16358 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16359 ins_encode %{
16360 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16361 %}
16362 ins_pipe(ialu_cr_reg_mem);
16363 %}
16364
16365 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16366 %{
16367 predicate(CompressedOops::base() == nullptr &&
16368 n->in(1)->as_Load()->barrier_data() == 0);
16369 match(Set cr (CmpN (LoadN mem) zero));
16370
16371 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16372 ins_encode %{
16373 __ cmpl(r12, $mem$$Address);
16374 %}
16375 ins_pipe(ialu_cr_reg_mem);
16376 %}
16377
16378 // Yanked all unsigned pointer compare operations.
16379 // Pointer compares are done with CmpP which is already unsigned.
16380
16381 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16382 %{
16383 match(Set cr (CmpL op1 op2));
16384
16385 format %{ "cmpq $op1, $op2" %}
16386 ins_encode %{
16387 __ cmpq($op1$$Register, $op2$$Register);
16388 %}
16389 ins_pipe(ialu_cr_reg_reg);
16390 %}
16391
16392 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16393 %{
16394 match(Set cr (CmpL op1 op2));
16395
16396 format %{ "cmpq $op1, $op2" %}
16397 ins_encode %{
16398 __ cmpq($op1$$Register, $op2$$constant);
16399 %}
16400 ins_pipe(ialu_cr_reg_imm);
16401 %}
16402
16403 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16404 %{
16405 match(Set cr (CmpL op1 (LoadL op2)));
16406
16407 format %{ "cmpq $op1, $op2" %}
16408 ins_encode %{
16409 __ cmpq($op1$$Register, $op2$$Address);
16410 %}
16411 ins_pipe(ialu_cr_reg_mem);
16412 %}
16413
16414 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16415 %{
16416 match(Set cr (CmpL src zero));
16417
16418 format %{ "testq $src, $src" %}
16419 ins_encode %{
16420 __ testq($src$$Register, $src$$Register);
16421 %}
16422 ins_pipe(ialu_cr_reg_imm);
16423 %}
16424
16425 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16426 %{
16427 match(Set cr (CmpL (AndL src con) zero));
16428
16429 format %{ "testq $src, $con\t# long" %}
16430 ins_encode %{
16431 __ testq($src$$Register, $con$$constant);
16432 %}
16433 ins_pipe(ialu_cr_reg_imm);
16434 %}
16435
16436 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16437 %{
16438 match(Set cr (CmpL (AndL src1 src2) zero));
16439
16440 format %{ "testq $src1, $src2\t# long" %}
16441 ins_encode %{
16442 __ testq($src1$$Register, $src2$$Register);
16443 %}
16444 ins_pipe(ialu_cr_reg_imm);
16445 %}
16446
16447 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16448 %{
16449 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16450
16451 format %{ "testq $src, $mem" %}
16452 ins_encode %{
16453 __ testq($src$$Register, $mem$$Address);
16454 %}
16455 ins_pipe(ialu_cr_reg_mem);
16456 %}
16457
16458 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16459 %{
16460 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16461
16462 format %{ "testq $src, $mem" %}
16463 ins_encode %{
16464 __ testq($src$$Register, $mem$$Address);
16465 %}
16466 ins_pipe(ialu_cr_reg_mem);
16467 %}
16468
16469 // Manifest a CmpU result in an integer register. Very painful.
16470 // This is the test to avoid.
16471 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16472 %{
16473 match(Set dst (CmpU3 src1 src2));
16474 effect(KILL flags);
16475
16476 ins_cost(275); // XXX
16477 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16478 "movl $dst, -1\n\t"
16479 "jb,u done\n\t"
16480 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16481 "done:" %}
16482 ins_encode %{
16483 Label done;
16484 __ cmpl($src1$$Register, $src2$$Register);
16485 __ movl($dst$$Register, -1);
16486 __ jccb(Assembler::below, done);
16487 __ setcc(Assembler::notZero, $dst$$Register);
16488 __ bind(done);
16489 %}
16490 ins_pipe(pipe_slow);
16491 %}
16492
16493 // Manifest a CmpL result in an integer register. Very painful.
16494 // This is the test to avoid.
16495 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16496 %{
16497 match(Set dst (CmpL3 src1 src2));
16498 effect(KILL flags);
16499
16500 ins_cost(275); // XXX
16501 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16502 "movl $dst, -1\n\t"
16503 "jl,s done\n\t"
16504 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16505 "done:" %}
16506 ins_encode %{
16507 Label done;
16508 __ cmpq($src1$$Register, $src2$$Register);
16509 __ movl($dst$$Register, -1);
16510 __ jccb(Assembler::less, done);
16511 __ setcc(Assembler::notZero, $dst$$Register);
16512 __ bind(done);
16513 %}
16514 ins_pipe(pipe_slow);
16515 %}
16516
16517 // Manifest a CmpUL result in an integer register. Very painful.
16518 // This is the test to avoid.
16519 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16520 %{
16521 match(Set dst (CmpUL3 src1 src2));
16522 effect(KILL flags);
16523
16524 ins_cost(275); // XXX
16525 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16526 "movl $dst, -1\n\t"
16527 "jb,u done\n\t"
16528 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16529 "done:" %}
16530 ins_encode %{
16531 Label done;
16532 __ cmpq($src1$$Register, $src2$$Register);
16533 __ movl($dst$$Register, -1);
16534 __ jccb(Assembler::below, done);
16535 __ setcc(Assembler::notZero, $dst$$Register);
16536 __ bind(done);
16537 %}
16538 ins_pipe(pipe_slow);
16539 %}
16540
16541 // Unsigned long compare Instructions; really, same as signed long except they
16542 // produce an rFlagsRegU instead of rFlagsReg.
16543 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16544 %{
16545 match(Set cr (CmpUL op1 op2));
16546
16547 format %{ "cmpq $op1, $op2\t# unsigned" %}
16548 ins_encode %{
16549 __ cmpq($op1$$Register, $op2$$Register);
16550 %}
16551 ins_pipe(ialu_cr_reg_reg);
16552 %}
16553
16554 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16555 %{
16556 match(Set cr (CmpUL op1 op2));
16557
16558 format %{ "cmpq $op1, $op2\t# unsigned" %}
16559 ins_encode %{
16560 __ cmpq($op1$$Register, $op2$$constant);
16561 %}
16562 ins_pipe(ialu_cr_reg_imm);
16563 %}
16564
16565 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16566 %{
16567 match(Set cr (CmpUL op1 (LoadL op2)));
16568
16569 format %{ "cmpq $op1, $op2\t# unsigned" %}
16570 ins_encode %{
16571 __ cmpq($op1$$Register, $op2$$Address);
16572 %}
16573 ins_pipe(ialu_cr_reg_mem);
16574 %}
16575
16576 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16577 %{
16578 match(Set cr (CmpUL src zero));
16579
16580 format %{ "testq $src, $src\t# unsigned" %}
16581 ins_encode %{
16582 __ testq($src$$Register, $src$$Register);
16583 %}
16584 ins_pipe(ialu_cr_reg_imm);
16585 %}
16586
16587 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16588 %{
16589 match(Set cr (CmpI (LoadB mem) imm));
16590
16591 ins_cost(125);
16592 format %{ "cmpb $mem, $imm" %}
16593 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16594 ins_pipe(ialu_cr_reg_mem);
16595 %}
16596
16597 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16598 %{
16599 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16600
16601 ins_cost(125);
16602 format %{ "testb $mem, $imm\t# ubyte" %}
16603 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16604 ins_pipe(ialu_cr_reg_mem);
16605 %}
16606
16607 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16608 %{
16609 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16610
16611 ins_cost(125);
16612 format %{ "testb $mem, $imm\t# byte" %}
16613 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16614 ins_pipe(ialu_cr_reg_mem);
16615 %}
16616
16617 //----------Max and Min--------------------------------------------------------
16618 // Min Instructions
16619
16620 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16621 %{
16622 predicate(!UseAPX);
16623 effect(USE_DEF dst, USE src, USE cr);
16624
16625 format %{ "cmovlgt $dst, $src\t# min" %}
16626 ins_encode %{
16627 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16628 %}
16629 ins_pipe(pipe_cmov_reg);
16630 %}
16631
16632 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16633 %{
16634 predicate(UseAPX);
16635 effect(DEF dst, USE src1, USE src2, USE cr);
16636
16637 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16638 ins_encode %{
16639 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16640 %}
16641 ins_pipe(pipe_cmov_reg);
16642 %}
16643
16644 instruct minI_rReg(rRegI dst, rRegI src)
16645 %{
16646 predicate(!UseAPX);
16647 match(Set dst (MinI dst src));
16648
16649 ins_cost(200);
16650 expand %{
16651 rFlagsReg cr;
16652 compI_rReg(cr, dst, src);
16653 cmovI_reg_g(dst, src, cr);
16654 %}
16655 %}
16656
16657 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16658 %{
16659 predicate(UseAPX);
16660 match(Set dst (MinI src1 src2));
16661 effect(DEF dst, USE src1, USE src2);
16662 flag(PD::Flag_ndd_demotable_opr1);
16663
16664 ins_cost(200);
16665 expand %{
16666 rFlagsReg cr;
16667 compI_rReg(cr, src1, src2);
16668 cmovI_reg_g_ndd(dst, src1, src2, cr);
16669 %}
16670 %}
16671
16672 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16673 %{
16674 predicate(!UseAPX);
16675 effect(USE_DEF dst, USE src, USE cr);
16676
16677 format %{ "cmovllt $dst, $src\t# max" %}
16678 ins_encode %{
16679 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16680 %}
16681 ins_pipe(pipe_cmov_reg);
16682 %}
16683
16684 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16685 %{
16686 predicate(UseAPX);
16687 effect(DEF dst, USE src1, USE src2, USE cr);
16688
16689 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16690 ins_encode %{
16691 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16692 %}
16693 ins_pipe(pipe_cmov_reg);
16694 %}
16695
16696 instruct maxI_rReg(rRegI dst, rRegI src)
16697 %{
16698 predicate(!UseAPX);
16699 match(Set dst (MaxI dst src));
16700
16701 ins_cost(200);
16702 expand %{
16703 rFlagsReg cr;
16704 compI_rReg(cr, dst, src);
16705 cmovI_reg_l(dst, src, cr);
16706 %}
16707 %}
16708
16709 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16710 %{
16711 predicate(UseAPX);
16712 match(Set dst (MaxI src1 src2));
16713 effect(DEF dst, USE src1, USE src2);
16714 flag(PD::Flag_ndd_demotable_opr1);
16715
16716 ins_cost(200);
16717 expand %{
16718 rFlagsReg cr;
16719 compI_rReg(cr, src1, src2);
16720 cmovI_reg_l_ndd(dst, src1, src2, cr);
16721 %}
16722 %}
16723
16724 // ============================================================================
16725 // Branch Instructions
16726
16727 // Jump Direct - Label defines a relative address from JMP+1
16728 instruct jmpDir(label labl)
16729 %{
16730 match(Goto);
16731 effect(USE labl);
16732
16733 ins_cost(300);
16734 format %{ "jmp $labl" %}
16735 size(5);
16736 ins_encode %{
16737 Label* L = $labl$$label;
16738 __ jmp(*L, false); // Always long jump
16739 %}
16740 ins_pipe(pipe_jmp);
16741 %}
16742
16743 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16744 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16745 %{
16746 match(If cop cr);
16747 effect(USE labl);
16748
16749 ins_cost(300);
16750 format %{ "j$cop $labl" %}
16751 size(6);
16752 ins_encode %{
16753 Label* L = $labl$$label;
16754 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16755 %}
16756 ins_pipe(pipe_jcc);
16757 %}
16758
16759 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16760 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16761 %{
16762 match(CountedLoopEnd cop cr);
16763 effect(USE labl);
16764
16765 ins_cost(300);
16766 format %{ "j$cop $labl\t# loop end" %}
16767 size(6);
16768 ins_encode %{
16769 Label* L = $labl$$label;
16770 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16771 %}
16772 ins_pipe(pipe_jcc);
16773 %}
16774
16775 // Jump Direct Conditional - using unsigned comparison
16776 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16777 match(If cop cmp);
16778 effect(USE labl);
16779
16780 ins_cost(300);
16781 format %{ "j$cop,u $labl" %}
16782 size(6);
16783 ins_encode %{
16784 Label* L = $labl$$label;
16785 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16786 %}
16787 ins_pipe(pipe_jcc);
16788 %}
16789
16790 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16791 match(If cop cmp);
16792 effect(USE labl);
16793
16794 ins_cost(200);
16795 format %{ "j$cop,u $labl" %}
16796 size(6);
16797 ins_encode %{
16798 Label* L = $labl$$label;
16799 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16800 %}
16801 ins_pipe(pipe_jcc);
16802 %}
16803
16804 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16805 match(If cop cmp);
16806 effect(USE labl);
16807
16808 ins_cost(200);
16809 format %{ $$template
16810 if ($cop$$cmpcode == Assembler::notEqual) {
16811 $$emit$$"jp,u $labl\n\t"
16812 $$emit$$"j$cop,u $labl"
16813 } else {
16814 $$emit$$"jp,u done\n\t"
16815 $$emit$$"j$cop,u $labl\n\t"
16816 $$emit$$"done:"
16817 }
16818 %}
16819 ins_encode %{
16820 Label* l = $labl$$label;
16821 if ($cop$$cmpcode == Assembler::notEqual) {
16822 __ jcc(Assembler::parity, *l, false);
16823 __ jcc(Assembler::notEqual, *l, false);
16824 } else if ($cop$$cmpcode == Assembler::equal) {
16825 Label done;
16826 __ jccb(Assembler::parity, done);
16827 __ jcc(Assembler::equal, *l, false);
16828 __ bind(done);
16829 } else {
16830 ShouldNotReachHere();
16831 }
16832 %}
16833 ins_pipe(pipe_jcc);
16834 %}
16835
16836 // ============================================================================
16837 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16838 // superklass array for an instance of the superklass. Set a hidden
16839 // internal cache on a hit (cache is checked with exposed code in
16840 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16841 // encoding ALSO sets flags.
16842
16843 instruct partialSubtypeCheck(rdi_RegP result,
16844 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16845 rFlagsReg cr)
16846 %{
16847 match(Set result (PartialSubtypeCheck sub super));
16848 predicate(!UseSecondarySupersTable);
16849 effect(KILL rcx, KILL cr);
16850
16851 ins_cost(1100); // slightly larger than the next version
16852 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16853 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16854 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16855 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16856 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16857 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16858 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16859 "miss:\t" %}
16860
16861 ins_encode %{
16862 Label miss;
16863 // NB: Callers may assume that, when $result is a valid register,
16864 // check_klass_subtype_slow_path_linear sets it to a nonzero
16865 // value.
16866 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16867 $rcx$$Register, $result$$Register,
16868 nullptr, &miss,
16869 /*set_cond_codes:*/ true);
16870 __ xorptr($result$$Register, $result$$Register);
16871 __ bind(miss);
16872 %}
16873
16874 ins_pipe(pipe_slow);
16875 %}
16876
16877 // ============================================================================
16878 // Two versions of hashtable-based partialSubtypeCheck, both used when
16879 // we need to search for a super class in the secondary supers array.
16880 // The first is used when we don't know _a priori_ the class being
16881 // searched for. The second, far more common, is used when we do know:
16882 // this is used for instanceof, checkcast, and any case where C2 can
16883 // determine it by constant propagation.
16884
16885 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16886 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16887 rFlagsReg cr)
16888 %{
16889 match(Set result (PartialSubtypeCheck sub super));
16890 predicate(UseSecondarySupersTable);
16891 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16892
16893 ins_cost(1000);
16894 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16895
16896 ins_encode %{
16897 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16898 $temp3$$Register, $temp4$$Register, $result$$Register);
16899 %}
16900
16901 ins_pipe(pipe_slow);
16902 %}
16903
16904 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16905 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16906 rFlagsReg cr)
16907 %{
16908 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16909 predicate(UseSecondarySupersTable);
16910 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16911
16912 ins_cost(700); // smaller than the next version
16913 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16914
16915 ins_encode %{
16916 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16917 if (InlineSecondarySupersTest) {
16918 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16919 $temp3$$Register, $temp4$$Register, $result$$Register,
16920 super_klass_slot);
16921 } else {
16922 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16923 }
16924 %}
16925
16926 ins_pipe(pipe_slow);
16927 %}
16928
16929 // ============================================================================
16930 // Branch Instructions -- short offset versions
16931 //
16932 // These instructions are used to replace jumps of a long offset (the default
16933 // match) with jumps of a shorter offset. These instructions are all tagged
16934 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16935 // match rules in general matching. Instead, the ADLC generates a conversion
16936 // method in the MachNode which can be used to do in-place replacement of the
16937 // long variant with the shorter variant. The compiler will determine if a
16938 // branch can be taken by the is_short_branch_offset() predicate in the machine
16939 // specific code section of the file.
16940
16941 // Jump Direct - Label defines a relative address from JMP+1
16942 instruct jmpDir_short(label labl) %{
16943 match(Goto);
16944 effect(USE labl);
16945
16946 ins_cost(300);
16947 format %{ "jmp,s $labl" %}
16948 size(2);
16949 ins_encode %{
16950 Label* L = $labl$$label;
16951 __ jmpb(*L);
16952 %}
16953 ins_pipe(pipe_jmp);
16954 ins_short_branch(1);
16955 %}
16956
16957 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16958 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16959 match(If cop cr);
16960 effect(USE labl);
16961
16962 ins_cost(300);
16963 format %{ "j$cop,s $labl" %}
16964 size(2);
16965 ins_encode %{
16966 Label* L = $labl$$label;
16967 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16968 %}
16969 ins_pipe(pipe_jcc);
16970 ins_short_branch(1);
16971 %}
16972
16973 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16974 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16975 match(CountedLoopEnd cop cr);
16976 effect(USE labl);
16977
16978 ins_cost(300);
16979 format %{ "j$cop,s $labl\t# loop end" %}
16980 size(2);
16981 ins_encode %{
16982 Label* L = $labl$$label;
16983 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16984 %}
16985 ins_pipe(pipe_jcc);
16986 ins_short_branch(1);
16987 %}
16988
16989 // Jump Direct Conditional - using unsigned comparison
16990 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16991 match(If cop cmp);
16992 effect(USE labl);
16993
16994 ins_cost(300);
16995 format %{ "j$cop,us $labl" %}
16996 size(2);
16997 ins_encode %{
16998 Label* L = $labl$$label;
16999 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17000 %}
17001 ins_pipe(pipe_jcc);
17002 ins_short_branch(1);
17003 %}
17004
17005 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17006 match(If cop cmp);
17007 effect(USE labl);
17008
17009 ins_cost(300);
17010 format %{ "j$cop,us $labl" %}
17011 size(2);
17012 ins_encode %{
17013 Label* L = $labl$$label;
17014 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17015 %}
17016 ins_pipe(pipe_jcc);
17017 ins_short_branch(1);
17018 %}
17019
17020 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17021 match(If cop cmp);
17022 effect(USE labl);
17023
17024 ins_cost(300);
17025 format %{ $$template
17026 if ($cop$$cmpcode == Assembler::notEqual) {
17027 $$emit$$"jp,u,s $labl\n\t"
17028 $$emit$$"j$cop,u,s $labl"
17029 } else {
17030 $$emit$$"jp,u,s done\n\t"
17031 $$emit$$"j$cop,u,s $labl\n\t"
17032 $$emit$$"done:"
17033 }
17034 %}
17035 size(4);
17036 ins_encode %{
17037 Label* l = $labl$$label;
17038 if ($cop$$cmpcode == Assembler::notEqual) {
17039 __ jccb(Assembler::parity, *l);
17040 __ jccb(Assembler::notEqual, *l);
17041 } else if ($cop$$cmpcode == Assembler::equal) {
17042 Label done;
17043 __ jccb(Assembler::parity, done);
17044 __ jccb(Assembler::equal, *l);
17045 __ bind(done);
17046 } else {
17047 ShouldNotReachHere();
17048 }
17049 %}
17050 ins_pipe(pipe_jcc);
17051 ins_short_branch(1);
17052 %}
17053
17054 // ============================================================================
17055 // inlined locking and unlocking
17056
17057 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17058 match(Set cr (FastLock object box));
17059 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17060 ins_cost(300);
17061 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17062 ins_encode %{
17063 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17064 %}
17065 ins_pipe(pipe_slow);
17066 %}
17067
17068 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17069 match(Set cr (FastUnlock object rax_reg));
17070 effect(TEMP tmp, USE_KILL rax_reg);
17071 ins_cost(300);
17072 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17073 ins_encode %{
17074 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17075 %}
17076 ins_pipe(pipe_slow);
17077 %}
17078
17079
17080 // ============================================================================
17081 // Safepoint Instructions
17082 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17083 %{
17084 match(SafePoint poll);
17085 effect(KILL cr, USE poll);
17086
17087 format %{ "testl rax, [$poll]\t"
17088 "# Safepoint: poll for GC" %}
17089 ins_cost(125);
17090 ins_encode %{
17091 __ relocate(relocInfo::poll_type);
17092 address pre_pc = __ pc();
17093 __ testl(rax, Address($poll$$Register, 0));
17094 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17095 %}
17096 ins_pipe(ialu_reg_mem);
17097 %}
17098
17099 instruct mask_all_evexL(kReg dst, rRegL src) %{
17100 match(Set dst (MaskAll src));
17101 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17102 ins_encode %{
17103 int mask_len = Matcher::vector_length(this);
17104 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17105 %}
17106 ins_pipe( pipe_slow );
17107 %}
17108
17109 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17110 predicate(Matcher::vector_length(n) > 32);
17111 match(Set dst (MaskAll src));
17112 effect(TEMP tmp);
17113 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17114 ins_encode %{
17115 int mask_len = Matcher::vector_length(this);
17116 __ movslq($tmp$$Register, $src$$Register);
17117 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17118 %}
17119 ins_pipe( pipe_slow );
17120 %}
17121
17122 // ============================================================================
17123 // Procedure Call/Return Instructions
17124 // Call Java Static Instruction
17125 // Note: If this code changes, the corresponding ret_addr_offset() and
17126 // compute_padding() functions will have to be adjusted.
17127 instruct CallStaticJavaDirect(method meth) %{
17128 match(CallStaticJava);
17129 effect(USE meth);
17130
17131 ins_cost(300);
17132 format %{ "call,static " %}
17133 opcode(0xE8); /* E8 cd */
17134 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17135 ins_pipe(pipe_slow);
17136 ins_alignment(4);
17137 %}
17138
17139 // Call Java Dynamic Instruction
17140 // Note: If this code changes, the corresponding ret_addr_offset() and
17141 // compute_padding() functions will have to be adjusted.
17142 instruct CallDynamicJavaDirect(method meth)
17143 %{
17144 match(CallDynamicJava);
17145 effect(USE meth);
17146
17147 ins_cost(300);
17148 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17149 "call,dynamic " %}
17150 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17151 ins_pipe(pipe_slow);
17152 ins_alignment(4);
17153 %}
17154
17155 // Call Runtime Instruction
17156 instruct CallRuntimeDirect(method meth)
17157 %{
17158 match(CallRuntime);
17159 effect(USE meth);
17160
17161 ins_cost(300);
17162 format %{ "call,runtime " %}
17163 ins_encode(clear_avx, Java_To_Runtime(meth));
17164 ins_pipe(pipe_slow);
17165 %}
17166
17167 // Call runtime without safepoint
17168 instruct CallLeafDirect(method meth)
17169 %{
17170 match(CallLeaf);
17171 effect(USE meth);
17172
17173 ins_cost(300);
17174 format %{ "call_leaf,runtime " %}
17175 ins_encode(clear_avx, Java_To_Runtime(meth));
17176 ins_pipe(pipe_slow);
17177 %}
17178
17179 // Call runtime without safepoint and with vector arguments
17180 instruct CallLeafDirectVector(method meth)
17181 %{
17182 match(CallLeafVector);
17183 effect(USE meth);
17184
17185 ins_cost(300);
17186 format %{ "call_leaf,vector " %}
17187 ins_encode(Java_To_Runtime(meth));
17188 ins_pipe(pipe_slow);
17189 %}
17190
17191 // Call runtime without safepoint
17192 instruct CallLeafNoFPDirect(method meth)
17193 %{
17194 match(CallLeafNoFP);
17195 effect(USE meth);
17196
17197 ins_cost(300);
17198 format %{ "call_leaf_nofp,runtime " %}
17199 ins_encode(clear_avx, Java_To_Runtime(meth));
17200 ins_pipe(pipe_slow);
17201 %}
17202
17203 // Return Instruction
17204 // Remove the return address & jump to it.
17205 // Notice: We always emit a nop after a ret to make sure there is room
17206 // for safepoint patching
17207 instruct Ret()
17208 %{
17209 match(Return);
17210
17211 format %{ "ret" %}
17212 ins_encode %{
17213 __ ret(0);
17214 %}
17215 ins_pipe(pipe_jmp);
17216 %}
17217
17218 // Tail Call; Jump from runtime stub to Java code.
17219 // Also known as an 'interprocedural jump'.
17220 // Target of jump will eventually return to caller.
17221 // TailJump below removes the return address.
17222 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17223 // emitted just above the TailCall which has reset rbp to the caller state.
17224 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17225 %{
17226 match(TailCall jump_target method_ptr);
17227
17228 ins_cost(300);
17229 format %{ "jmp $jump_target\t# rbx holds method" %}
17230 ins_encode %{
17231 __ jmp($jump_target$$Register);
17232 %}
17233 ins_pipe(pipe_jmp);
17234 %}
17235
17236 // Tail Jump; remove the return address; jump to target.
17237 // TailCall above leaves the return address around.
17238 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17239 %{
17240 match(TailJump jump_target ex_oop);
17241
17242 ins_cost(300);
17243 format %{ "popq rdx\t# pop return address\n\t"
17244 "jmp $jump_target" %}
17245 ins_encode %{
17246 __ popq(as_Register(RDX_enc));
17247 __ jmp($jump_target$$Register);
17248 %}
17249 ins_pipe(pipe_jmp);
17250 %}
17251
17252 // Forward exception.
17253 instruct ForwardExceptionjmp()
17254 %{
17255 match(ForwardException);
17256
17257 format %{ "jmp forward_exception_stub" %}
17258 ins_encode %{
17259 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17260 %}
17261 ins_pipe(pipe_jmp);
17262 %}
17263
17264 // Create exception oop: created by stack-crawling runtime code.
17265 // Created exception is now available to this handler, and is setup
17266 // just prior to jumping to this handler. No code emitted.
17267 instruct CreateException(rax_RegP ex_oop)
17268 %{
17269 match(Set ex_oop (CreateEx));
17270
17271 size(0);
17272 // use the following format syntax
17273 format %{ "# exception oop is in rax; no code emitted" %}
17274 ins_encode();
17275 ins_pipe(empty);
17276 %}
17277
17278 // Rethrow exception:
17279 // The exception oop will come in the first argument position.
17280 // Then JUMP (not call) to the rethrow stub code.
17281 instruct RethrowException()
17282 %{
17283 match(Rethrow);
17284
17285 // use the following format syntax
17286 format %{ "jmp rethrow_stub" %}
17287 ins_encode %{
17288 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17289 %}
17290 ins_pipe(pipe_jmp);
17291 %}
17292
17293 // ============================================================================
17294 // This name is KNOWN by the ADLC and cannot be changed.
17295 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17296 // for this guy.
17297 instruct tlsLoadP(r15_RegP dst) %{
17298 match(Set dst (ThreadLocal));
17299 effect(DEF dst);
17300
17301 size(0);
17302 format %{ "# TLS is in R15" %}
17303 ins_encode( /*empty encoding*/ );
17304 ins_pipe(ialu_reg_reg);
17305 %}
17306
17307 instruct addF_reg(regF dst, regF src) %{
17308 predicate(UseAVX == 0);
17309 match(Set dst (AddF dst src));
17310
17311 format %{ "addss $dst, $src" %}
17312 ins_cost(150);
17313 ins_encode %{
17314 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17315 %}
17316 ins_pipe(pipe_slow);
17317 %}
17318
17319 instruct addF_mem(regF dst, memory src) %{
17320 predicate(UseAVX == 0);
17321 match(Set dst (AddF dst (LoadF src)));
17322
17323 format %{ "addss $dst, $src" %}
17324 ins_cost(150);
17325 ins_encode %{
17326 __ addss($dst$$XMMRegister, $src$$Address);
17327 %}
17328 ins_pipe(pipe_slow);
17329 %}
17330
17331 instruct addF_imm(regF dst, immF con) %{
17332 predicate(UseAVX == 0);
17333 match(Set dst (AddF dst con));
17334 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17335 ins_cost(150);
17336 ins_encode %{
17337 __ addss($dst$$XMMRegister, $constantaddress($con));
17338 %}
17339 ins_pipe(pipe_slow);
17340 %}
17341
17342 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17343 predicate(UseAVX > 0);
17344 match(Set dst (AddF src1 src2));
17345
17346 format %{ "vaddss $dst, $src1, $src2" %}
17347 ins_cost(150);
17348 ins_encode %{
17349 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17350 %}
17351 ins_pipe(pipe_slow);
17352 %}
17353
17354 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17355 predicate(UseAVX > 0);
17356 match(Set dst (AddF src1 (LoadF src2)));
17357
17358 format %{ "vaddss $dst, $src1, $src2" %}
17359 ins_cost(150);
17360 ins_encode %{
17361 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17362 %}
17363 ins_pipe(pipe_slow);
17364 %}
17365
17366 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17367 predicate(UseAVX > 0);
17368 match(Set dst (AddF src con));
17369
17370 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17371 ins_cost(150);
17372 ins_encode %{
17373 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17374 %}
17375 ins_pipe(pipe_slow);
17376 %}
17377
17378 instruct addD_reg(regD dst, regD src) %{
17379 predicate(UseAVX == 0);
17380 match(Set dst (AddD dst src));
17381
17382 format %{ "addsd $dst, $src" %}
17383 ins_cost(150);
17384 ins_encode %{
17385 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17386 %}
17387 ins_pipe(pipe_slow);
17388 %}
17389
17390 instruct addD_mem(regD dst, memory src) %{
17391 predicate(UseAVX == 0);
17392 match(Set dst (AddD dst (LoadD src)));
17393
17394 format %{ "addsd $dst, $src" %}
17395 ins_cost(150);
17396 ins_encode %{
17397 __ addsd($dst$$XMMRegister, $src$$Address);
17398 %}
17399 ins_pipe(pipe_slow);
17400 %}
17401
17402 instruct addD_imm(regD dst, immD con) %{
17403 predicate(UseAVX == 0);
17404 match(Set dst (AddD dst con));
17405 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17406 ins_cost(150);
17407 ins_encode %{
17408 __ addsd($dst$$XMMRegister, $constantaddress($con));
17409 %}
17410 ins_pipe(pipe_slow);
17411 %}
17412
17413 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17414 predicate(UseAVX > 0);
17415 match(Set dst (AddD src1 src2));
17416
17417 format %{ "vaddsd $dst, $src1, $src2" %}
17418 ins_cost(150);
17419 ins_encode %{
17420 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17421 %}
17422 ins_pipe(pipe_slow);
17423 %}
17424
17425 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17426 predicate(UseAVX > 0);
17427 match(Set dst (AddD src1 (LoadD src2)));
17428
17429 format %{ "vaddsd $dst, $src1, $src2" %}
17430 ins_cost(150);
17431 ins_encode %{
17432 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17433 %}
17434 ins_pipe(pipe_slow);
17435 %}
17436
17437 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17438 predicate(UseAVX > 0);
17439 match(Set dst (AddD src con));
17440
17441 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17442 ins_cost(150);
17443 ins_encode %{
17444 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17445 %}
17446 ins_pipe(pipe_slow);
17447 %}
17448
17449 instruct subF_reg(regF dst, regF src) %{
17450 predicate(UseAVX == 0);
17451 match(Set dst (SubF dst src));
17452
17453 format %{ "subss $dst, $src" %}
17454 ins_cost(150);
17455 ins_encode %{
17456 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17457 %}
17458 ins_pipe(pipe_slow);
17459 %}
17460
17461 instruct subF_mem(regF dst, memory src) %{
17462 predicate(UseAVX == 0);
17463 match(Set dst (SubF dst (LoadF src)));
17464
17465 format %{ "subss $dst, $src" %}
17466 ins_cost(150);
17467 ins_encode %{
17468 __ subss($dst$$XMMRegister, $src$$Address);
17469 %}
17470 ins_pipe(pipe_slow);
17471 %}
17472
17473 instruct subF_imm(regF dst, immF con) %{
17474 predicate(UseAVX == 0);
17475 match(Set dst (SubF dst con));
17476 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17477 ins_cost(150);
17478 ins_encode %{
17479 __ subss($dst$$XMMRegister, $constantaddress($con));
17480 %}
17481 ins_pipe(pipe_slow);
17482 %}
17483
17484 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17485 predicate(UseAVX > 0);
17486 match(Set dst (SubF src1 src2));
17487
17488 format %{ "vsubss $dst, $src1, $src2" %}
17489 ins_cost(150);
17490 ins_encode %{
17491 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17492 %}
17493 ins_pipe(pipe_slow);
17494 %}
17495
17496 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17497 predicate(UseAVX > 0);
17498 match(Set dst (SubF src1 (LoadF src2)));
17499
17500 format %{ "vsubss $dst, $src1, $src2" %}
17501 ins_cost(150);
17502 ins_encode %{
17503 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17504 %}
17505 ins_pipe(pipe_slow);
17506 %}
17507
17508 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17509 predicate(UseAVX > 0);
17510 match(Set dst (SubF src con));
17511
17512 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17513 ins_cost(150);
17514 ins_encode %{
17515 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17516 %}
17517 ins_pipe(pipe_slow);
17518 %}
17519
17520 instruct subD_reg(regD dst, regD src) %{
17521 predicate(UseAVX == 0);
17522 match(Set dst (SubD dst src));
17523
17524 format %{ "subsd $dst, $src" %}
17525 ins_cost(150);
17526 ins_encode %{
17527 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17528 %}
17529 ins_pipe(pipe_slow);
17530 %}
17531
17532 instruct subD_mem(regD dst, memory src) %{
17533 predicate(UseAVX == 0);
17534 match(Set dst (SubD dst (LoadD src)));
17535
17536 format %{ "subsd $dst, $src" %}
17537 ins_cost(150);
17538 ins_encode %{
17539 __ subsd($dst$$XMMRegister, $src$$Address);
17540 %}
17541 ins_pipe(pipe_slow);
17542 %}
17543
17544 instruct subD_imm(regD dst, immD con) %{
17545 predicate(UseAVX == 0);
17546 match(Set dst (SubD dst con));
17547 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17548 ins_cost(150);
17549 ins_encode %{
17550 __ subsd($dst$$XMMRegister, $constantaddress($con));
17551 %}
17552 ins_pipe(pipe_slow);
17553 %}
17554
17555 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17556 predicate(UseAVX > 0);
17557 match(Set dst (SubD src1 src2));
17558
17559 format %{ "vsubsd $dst, $src1, $src2" %}
17560 ins_cost(150);
17561 ins_encode %{
17562 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17563 %}
17564 ins_pipe(pipe_slow);
17565 %}
17566
17567 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17568 predicate(UseAVX > 0);
17569 match(Set dst (SubD src1 (LoadD src2)));
17570
17571 format %{ "vsubsd $dst, $src1, $src2" %}
17572 ins_cost(150);
17573 ins_encode %{
17574 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17575 %}
17576 ins_pipe(pipe_slow);
17577 %}
17578
17579 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17580 predicate(UseAVX > 0);
17581 match(Set dst (SubD src con));
17582
17583 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17584 ins_cost(150);
17585 ins_encode %{
17586 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17587 %}
17588 ins_pipe(pipe_slow);
17589 %}
17590
17591 instruct mulF_reg(regF dst, regF src) %{
17592 predicate(UseAVX == 0);
17593 match(Set dst (MulF dst src));
17594
17595 format %{ "mulss $dst, $src" %}
17596 ins_cost(150);
17597 ins_encode %{
17598 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17599 %}
17600 ins_pipe(pipe_slow);
17601 %}
17602
17603 instruct mulF_mem(regF dst, memory src) %{
17604 predicate(UseAVX == 0);
17605 match(Set dst (MulF dst (LoadF src)));
17606
17607 format %{ "mulss $dst, $src" %}
17608 ins_cost(150);
17609 ins_encode %{
17610 __ mulss($dst$$XMMRegister, $src$$Address);
17611 %}
17612 ins_pipe(pipe_slow);
17613 %}
17614
17615 instruct mulF_imm(regF dst, immF con) %{
17616 predicate(UseAVX == 0);
17617 match(Set dst (MulF dst con));
17618 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17619 ins_cost(150);
17620 ins_encode %{
17621 __ mulss($dst$$XMMRegister, $constantaddress($con));
17622 %}
17623 ins_pipe(pipe_slow);
17624 %}
17625
17626 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17627 predicate(UseAVX > 0);
17628 match(Set dst (MulF src1 src2));
17629
17630 format %{ "vmulss $dst, $src1, $src2" %}
17631 ins_cost(150);
17632 ins_encode %{
17633 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17634 %}
17635 ins_pipe(pipe_slow);
17636 %}
17637
17638 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17639 predicate(UseAVX > 0);
17640 match(Set dst (MulF src1 (LoadF src2)));
17641
17642 format %{ "vmulss $dst, $src1, $src2" %}
17643 ins_cost(150);
17644 ins_encode %{
17645 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17646 %}
17647 ins_pipe(pipe_slow);
17648 %}
17649
17650 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17651 predicate(UseAVX > 0);
17652 match(Set dst (MulF src con));
17653
17654 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17655 ins_cost(150);
17656 ins_encode %{
17657 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17658 %}
17659 ins_pipe(pipe_slow);
17660 %}
17661
17662 instruct mulD_reg(regD dst, regD src) %{
17663 predicate(UseAVX == 0);
17664 match(Set dst (MulD dst src));
17665
17666 format %{ "mulsd $dst, $src" %}
17667 ins_cost(150);
17668 ins_encode %{
17669 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17670 %}
17671 ins_pipe(pipe_slow);
17672 %}
17673
17674 instruct mulD_mem(regD dst, memory src) %{
17675 predicate(UseAVX == 0);
17676 match(Set dst (MulD dst (LoadD src)));
17677
17678 format %{ "mulsd $dst, $src" %}
17679 ins_cost(150);
17680 ins_encode %{
17681 __ mulsd($dst$$XMMRegister, $src$$Address);
17682 %}
17683 ins_pipe(pipe_slow);
17684 %}
17685
17686 instruct mulD_imm(regD dst, immD con) %{
17687 predicate(UseAVX == 0);
17688 match(Set dst (MulD dst con));
17689 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17690 ins_cost(150);
17691 ins_encode %{
17692 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17693 %}
17694 ins_pipe(pipe_slow);
17695 %}
17696
17697 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17698 predicate(UseAVX > 0);
17699 match(Set dst (MulD src1 src2));
17700
17701 format %{ "vmulsd $dst, $src1, $src2" %}
17702 ins_cost(150);
17703 ins_encode %{
17704 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17705 %}
17706 ins_pipe(pipe_slow);
17707 %}
17708
17709 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17710 predicate(UseAVX > 0);
17711 match(Set dst (MulD src1 (LoadD src2)));
17712
17713 format %{ "vmulsd $dst, $src1, $src2" %}
17714 ins_cost(150);
17715 ins_encode %{
17716 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17717 %}
17718 ins_pipe(pipe_slow);
17719 %}
17720
17721 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17722 predicate(UseAVX > 0);
17723 match(Set dst (MulD src con));
17724
17725 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17726 ins_cost(150);
17727 ins_encode %{
17728 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17729 %}
17730 ins_pipe(pipe_slow);
17731 %}
17732
17733 instruct divF_reg(regF dst, regF src) %{
17734 predicate(UseAVX == 0);
17735 match(Set dst (DivF dst src));
17736
17737 format %{ "divss $dst, $src" %}
17738 ins_cost(150);
17739 ins_encode %{
17740 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17741 %}
17742 ins_pipe(pipe_slow);
17743 %}
17744
17745 instruct divF_mem(regF dst, memory src) %{
17746 predicate(UseAVX == 0);
17747 match(Set dst (DivF dst (LoadF src)));
17748
17749 format %{ "divss $dst, $src" %}
17750 ins_cost(150);
17751 ins_encode %{
17752 __ divss($dst$$XMMRegister, $src$$Address);
17753 %}
17754 ins_pipe(pipe_slow);
17755 %}
17756
17757 instruct divF_imm(regF dst, immF con) %{
17758 predicate(UseAVX == 0);
17759 match(Set dst (DivF dst con));
17760 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17761 ins_cost(150);
17762 ins_encode %{
17763 __ divss($dst$$XMMRegister, $constantaddress($con));
17764 %}
17765 ins_pipe(pipe_slow);
17766 %}
17767
17768 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17769 predicate(UseAVX > 0);
17770 match(Set dst (DivF src1 src2));
17771
17772 format %{ "vdivss $dst, $src1, $src2" %}
17773 ins_cost(150);
17774 ins_encode %{
17775 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17776 %}
17777 ins_pipe(pipe_slow);
17778 %}
17779
17780 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17781 predicate(UseAVX > 0);
17782 match(Set dst (DivF src1 (LoadF src2)));
17783
17784 format %{ "vdivss $dst, $src1, $src2" %}
17785 ins_cost(150);
17786 ins_encode %{
17787 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17788 %}
17789 ins_pipe(pipe_slow);
17790 %}
17791
17792 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17793 predicate(UseAVX > 0);
17794 match(Set dst (DivF src con));
17795
17796 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17797 ins_cost(150);
17798 ins_encode %{
17799 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17800 %}
17801 ins_pipe(pipe_slow);
17802 %}
17803
17804 instruct divD_reg(regD dst, regD src) %{
17805 predicate(UseAVX == 0);
17806 match(Set dst (DivD dst src));
17807
17808 format %{ "divsd $dst, $src" %}
17809 ins_cost(150);
17810 ins_encode %{
17811 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17812 %}
17813 ins_pipe(pipe_slow);
17814 %}
17815
17816 instruct divD_mem(regD dst, memory src) %{
17817 predicate(UseAVX == 0);
17818 match(Set dst (DivD dst (LoadD src)));
17819
17820 format %{ "divsd $dst, $src" %}
17821 ins_cost(150);
17822 ins_encode %{
17823 __ divsd($dst$$XMMRegister, $src$$Address);
17824 %}
17825 ins_pipe(pipe_slow);
17826 %}
17827
17828 instruct divD_imm(regD dst, immD con) %{
17829 predicate(UseAVX == 0);
17830 match(Set dst (DivD dst con));
17831 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17832 ins_cost(150);
17833 ins_encode %{
17834 __ divsd($dst$$XMMRegister, $constantaddress($con));
17835 %}
17836 ins_pipe(pipe_slow);
17837 %}
17838
17839 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17840 predicate(UseAVX > 0);
17841 match(Set dst (DivD src1 src2));
17842
17843 format %{ "vdivsd $dst, $src1, $src2" %}
17844 ins_cost(150);
17845 ins_encode %{
17846 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17847 %}
17848 ins_pipe(pipe_slow);
17849 %}
17850
17851 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17852 predicate(UseAVX > 0);
17853 match(Set dst (DivD src1 (LoadD src2)));
17854
17855 format %{ "vdivsd $dst, $src1, $src2" %}
17856 ins_cost(150);
17857 ins_encode %{
17858 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17859 %}
17860 ins_pipe(pipe_slow);
17861 %}
17862
17863 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17864 predicate(UseAVX > 0);
17865 match(Set dst (DivD src con));
17866
17867 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17868 ins_cost(150);
17869 ins_encode %{
17870 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17871 %}
17872 ins_pipe(pipe_slow);
17873 %}
17874
17875 instruct absF_reg(regF dst) %{
17876 predicate(UseAVX == 0);
17877 match(Set dst (AbsF dst));
17878 ins_cost(150);
17879 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17880 ins_encode %{
17881 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17882 %}
17883 ins_pipe(pipe_slow);
17884 %}
17885
17886 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17887 predicate(UseAVX > 0);
17888 match(Set dst (AbsF src));
17889 ins_cost(150);
17890 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17891 ins_encode %{
17892 int vlen_enc = Assembler::AVX_128bit;
17893 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17894 ExternalAddress(float_signmask()), vlen_enc);
17895 %}
17896 ins_pipe(pipe_slow);
17897 %}
17898
17899 instruct absD_reg(regD dst) %{
17900 predicate(UseAVX == 0);
17901 match(Set dst (AbsD dst));
17902 ins_cost(150);
17903 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17904 "# abs double by sign masking" %}
17905 ins_encode %{
17906 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17907 %}
17908 ins_pipe(pipe_slow);
17909 %}
17910
17911 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17912 predicate(UseAVX > 0);
17913 match(Set dst (AbsD src));
17914 ins_cost(150);
17915 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17916 "# abs double by sign masking" %}
17917 ins_encode %{
17918 int vlen_enc = Assembler::AVX_128bit;
17919 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17920 ExternalAddress(double_signmask()), vlen_enc);
17921 %}
17922 ins_pipe(pipe_slow);
17923 %}
17924
17925 instruct negF_reg(regF dst) %{
17926 predicate(UseAVX == 0);
17927 match(Set dst (NegF dst));
17928 ins_cost(150);
17929 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17930 ins_encode %{
17931 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17932 %}
17933 ins_pipe(pipe_slow);
17934 %}
17935
17936 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17937 predicate(UseAVX > 0);
17938 match(Set dst (NegF src));
17939 ins_cost(150);
17940 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17941 ins_encode %{
17942 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17943 ExternalAddress(float_signflip()));
17944 %}
17945 ins_pipe(pipe_slow);
17946 %}
17947
17948 instruct negD_reg(regD dst) %{
17949 predicate(UseAVX == 0);
17950 match(Set dst (NegD dst));
17951 ins_cost(150);
17952 format %{ "xorpd $dst, [0x8000000000000000]\t"
17953 "# neg double by sign flipping" %}
17954 ins_encode %{
17955 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17956 %}
17957 ins_pipe(pipe_slow);
17958 %}
17959
17960 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17961 predicate(UseAVX > 0);
17962 match(Set dst (NegD src));
17963 ins_cost(150);
17964 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17965 "# neg double by sign flipping" %}
17966 ins_encode %{
17967 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17968 ExternalAddress(double_signflip()));
17969 %}
17970 ins_pipe(pipe_slow);
17971 %}
17972
17973 // sqrtss instruction needs destination register to be pre initialized for best performance
17974 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17975 instruct sqrtF_reg(regF dst) %{
17976 match(Set dst (SqrtF dst));
17977 format %{ "sqrtss $dst, $dst" %}
17978 ins_encode %{
17979 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17980 %}
17981 ins_pipe(pipe_slow);
17982 %}
17983
17984 // sqrtsd instruction needs destination register to be pre initialized for best performance
17985 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17986 instruct sqrtD_reg(regD dst) %{
17987 match(Set dst (SqrtD dst));
17988 format %{ "sqrtsd $dst, $dst" %}
17989 ins_encode %{
17990 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17991 %}
17992 ins_pipe(pipe_slow);
17993 %}
17994
17995 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17996 effect(TEMP tmp);
17997 match(Set dst (ConvF2HF src));
17998 ins_cost(125);
17999 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18000 ins_encode %{
18001 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18002 %}
18003 ins_pipe( pipe_slow );
18004 %}
18005
18006 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18007 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18008 effect(TEMP ktmp, TEMP rtmp);
18009 match(Set mem (StoreC mem (ConvF2HF src)));
18010 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18011 ins_encode %{
18012 __ movl($rtmp$$Register, 0x1);
18013 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18014 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18015 %}
18016 ins_pipe( pipe_slow );
18017 %}
18018
18019 instruct vconvF2HF(vec dst, vec src) %{
18020 match(Set dst (VectorCastF2HF src));
18021 format %{ "vector_conv_F2HF $dst $src" %}
18022 ins_encode %{
18023 int vlen_enc = vector_length_encoding(this, $src);
18024 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18025 %}
18026 ins_pipe( pipe_slow );
18027 %}
18028
18029 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18030 predicate(n->as_StoreVector()->memory_size() >= 16);
18031 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18032 format %{ "vcvtps2ph $mem,$src" %}
18033 ins_encode %{
18034 int vlen_enc = vector_length_encoding(this, $src);
18035 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18036 %}
18037 ins_pipe( pipe_slow );
18038 %}
18039
18040 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18041 match(Set dst (ConvHF2F src));
18042 format %{ "vcvtph2ps $dst,$src" %}
18043 ins_encode %{
18044 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18045 %}
18046 ins_pipe( pipe_slow );
18047 %}
18048
18049 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18050 match(Set dst (VectorCastHF2F (LoadVector mem)));
18051 format %{ "vcvtph2ps $dst,$mem" %}
18052 ins_encode %{
18053 int vlen_enc = vector_length_encoding(this);
18054 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18055 %}
18056 ins_pipe( pipe_slow );
18057 %}
18058
18059 instruct vconvHF2F(vec dst, vec src) %{
18060 match(Set dst (VectorCastHF2F src));
18061 ins_cost(125);
18062 format %{ "vector_conv_HF2F $dst,$src" %}
18063 ins_encode %{
18064 int vlen_enc = vector_length_encoding(this);
18065 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18066 %}
18067 ins_pipe( pipe_slow );
18068 %}
18069
18070 // ---------------------------------------- VectorReinterpret ------------------------------------
18071 instruct reinterpret_mask(kReg dst) %{
18072 predicate(n->bottom_type()->isa_vectmask() &&
18073 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18074 match(Set dst (VectorReinterpret dst));
18075 ins_cost(125);
18076 format %{ "vector_reinterpret $dst\t!" %}
18077 ins_encode %{
18078 // empty
18079 %}
18080 ins_pipe( pipe_slow );
18081 %}
18082
18083 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18084 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18085 n->bottom_type()->isa_vectmask() &&
18086 n->in(1)->bottom_type()->isa_vectmask() &&
18087 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18088 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18089 match(Set dst (VectorReinterpret src));
18090 effect(TEMP xtmp);
18091 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18092 ins_encode %{
18093 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18094 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18095 assert(src_sz == dst_sz , "src and dst size mismatch");
18096 int vlen_enc = vector_length_encoding(src_sz);
18097 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18098 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18099 %}
18100 ins_pipe( pipe_slow );
18101 %}
18102
18103 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18104 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18105 n->bottom_type()->isa_vectmask() &&
18106 n->in(1)->bottom_type()->isa_vectmask() &&
18107 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18108 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18109 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18110 match(Set dst (VectorReinterpret src));
18111 effect(TEMP xtmp);
18112 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18113 ins_encode %{
18114 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18115 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18116 assert(src_sz == dst_sz , "src and dst size mismatch");
18117 int vlen_enc = vector_length_encoding(src_sz);
18118 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18119 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18120 %}
18121 ins_pipe( pipe_slow );
18122 %}
18123
18124 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18125 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18126 n->bottom_type()->isa_vectmask() &&
18127 n->in(1)->bottom_type()->isa_vectmask() &&
18128 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18129 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18130 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18131 match(Set dst (VectorReinterpret src));
18132 effect(TEMP xtmp);
18133 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18134 ins_encode %{
18135 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18136 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18137 assert(src_sz == dst_sz , "src and dst size mismatch");
18138 int vlen_enc = vector_length_encoding(src_sz);
18139 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18140 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18141 %}
18142 ins_pipe( pipe_slow );
18143 %}
18144
18145 instruct reinterpret(vec dst) %{
18146 predicate(!n->bottom_type()->isa_vectmask() &&
18147 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18148 match(Set dst (VectorReinterpret dst));
18149 ins_cost(125);
18150 format %{ "vector_reinterpret $dst\t!" %}
18151 ins_encode %{
18152 // empty
18153 %}
18154 ins_pipe( pipe_slow );
18155 %}
18156
18157 instruct reinterpret_expand(vec dst, vec src) %{
18158 predicate(UseAVX == 0 &&
18159 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18160 match(Set dst (VectorReinterpret src));
18161 ins_cost(125);
18162 effect(TEMP dst);
18163 format %{ "vector_reinterpret_expand $dst,$src" %}
18164 ins_encode %{
18165 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18166 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18167
18168 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18169 if (src_vlen_in_bytes == 4) {
18170 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18171 } else {
18172 assert(src_vlen_in_bytes == 8, "");
18173 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18174 }
18175 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18176 %}
18177 ins_pipe( pipe_slow );
18178 %}
18179
18180 instruct vreinterpret_expand4(legVec dst, vec src) %{
18181 predicate(UseAVX > 0 &&
18182 !n->bottom_type()->isa_vectmask() &&
18183 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18184 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18185 match(Set dst (VectorReinterpret src));
18186 ins_cost(125);
18187 format %{ "vector_reinterpret_expand $dst,$src" %}
18188 ins_encode %{
18189 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18190 %}
18191 ins_pipe( pipe_slow );
18192 %}
18193
18194
18195 instruct vreinterpret_expand(legVec dst, vec src) %{
18196 predicate(UseAVX > 0 &&
18197 !n->bottom_type()->isa_vectmask() &&
18198 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18199 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18200 match(Set dst (VectorReinterpret src));
18201 ins_cost(125);
18202 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18203 ins_encode %{
18204 switch (Matcher::vector_length_in_bytes(this, $src)) {
18205 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18206 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18207 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18208 default: ShouldNotReachHere();
18209 }
18210 %}
18211 ins_pipe( pipe_slow );
18212 %}
18213
18214 instruct reinterpret_shrink(vec dst, legVec src) %{
18215 predicate(!n->bottom_type()->isa_vectmask() &&
18216 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18217 match(Set dst (VectorReinterpret src));
18218 ins_cost(125);
18219 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18220 ins_encode %{
18221 switch (Matcher::vector_length_in_bytes(this)) {
18222 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18223 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18224 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18225 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18226 default: ShouldNotReachHere();
18227 }
18228 %}
18229 ins_pipe( pipe_slow );
18230 %}
18231
18232 // ----------------------------------------------------------------------------------------------------
18233
18234 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18235 match(Set dst (RoundDoubleMode src rmode));
18236 format %{ "roundsd $dst,$src" %}
18237 ins_cost(150);
18238 ins_encode %{
18239 assert(UseSSE >= 4, "required");
18240 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18241 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18242 }
18243 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18244 %}
18245 ins_pipe(pipe_slow);
18246 %}
18247
18248 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18249 match(Set dst (RoundDoubleMode con rmode));
18250 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18251 ins_cost(150);
18252 ins_encode %{
18253 assert(UseSSE >= 4, "required");
18254 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18255 %}
18256 ins_pipe(pipe_slow);
18257 %}
18258
18259 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18260 predicate(Matcher::vector_length(n) < 8);
18261 match(Set dst (RoundDoubleModeV src rmode));
18262 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18263 ins_encode %{
18264 assert(UseAVX > 0, "required");
18265 int vlen_enc = vector_length_encoding(this);
18266 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18267 %}
18268 ins_pipe( pipe_slow );
18269 %}
18270
18271 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18272 predicate(Matcher::vector_length(n) == 8);
18273 match(Set dst (RoundDoubleModeV src rmode));
18274 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18275 ins_encode %{
18276 assert(UseAVX > 2, "required");
18277 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18278 %}
18279 ins_pipe( pipe_slow );
18280 %}
18281
18282 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18283 predicate(Matcher::vector_length(n) < 8);
18284 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18285 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18286 ins_encode %{
18287 assert(UseAVX > 0, "required");
18288 int vlen_enc = vector_length_encoding(this);
18289 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18290 %}
18291 ins_pipe( pipe_slow );
18292 %}
18293
18294 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18295 predicate(Matcher::vector_length(n) == 8);
18296 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18297 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18298 ins_encode %{
18299 assert(UseAVX > 2, "required");
18300 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18301 %}
18302 ins_pipe( pipe_slow );
18303 %}
18304
18305 instruct onspinwait() %{
18306 match(OnSpinWait);
18307 ins_cost(200);
18308
18309 format %{
18310 $$template
18311 $$emit$$"pause\t! membar_onspinwait"
18312 %}
18313 ins_encode %{
18314 __ pause();
18315 %}
18316 ins_pipe(pipe_slow);
18317 %}
18318
18319 // a * b + c
18320 instruct fmaD_reg(regD a, regD b, regD c) %{
18321 match(Set c (FmaD c (Binary a b)));
18322 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18323 ins_cost(150);
18324 ins_encode %{
18325 assert(UseFMA, "Needs FMA instructions support.");
18326 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18327 %}
18328 ins_pipe( pipe_slow );
18329 %}
18330
18331 // a * b + c
18332 instruct fmaF_reg(regF a, regF b, regF c) %{
18333 match(Set c (FmaF c (Binary a b)));
18334 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18335 ins_cost(150);
18336 ins_encode %{
18337 assert(UseFMA, "Needs FMA instructions support.");
18338 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18339 %}
18340 ins_pipe( pipe_slow );
18341 %}
18342
18343 // ====================VECTOR INSTRUCTIONS=====================================
18344
18345 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18346 instruct MoveVec2Leg(legVec dst, vec src) %{
18347 match(Set dst src);
18348 format %{ "" %}
18349 ins_encode %{
18350 ShouldNotReachHere();
18351 %}
18352 ins_pipe( fpu_reg_reg );
18353 %}
18354
18355 instruct MoveLeg2Vec(vec dst, legVec src) %{
18356 match(Set dst src);
18357 format %{ "" %}
18358 ins_encode %{
18359 ShouldNotReachHere();
18360 %}
18361 ins_pipe( fpu_reg_reg );
18362 %}
18363
18364 // ============================================================================
18365
18366 // Load vectors generic operand pattern
18367 instruct loadV(vec dst, memory mem) %{
18368 match(Set dst (LoadVector mem));
18369 ins_cost(125);
18370 format %{ "load_vector $dst,$mem" %}
18371 ins_encode %{
18372 BasicType bt = Matcher::vector_element_basic_type(this);
18373 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18374 %}
18375 ins_pipe( pipe_slow );
18376 %}
18377
18378 // Store vectors generic operand pattern.
18379 instruct storeV(memory mem, vec src) %{
18380 match(Set mem (StoreVector mem src));
18381 ins_cost(145);
18382 format %{ "store_vector $mem,$src\n\t" %}
18383 ins_encode %{
18384 switch (Matcher::vector_length_in_bytes(this, $src)) {
18385 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18386 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18387 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18388 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18389 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18390 default: ShouldNotReachHere();
18391 }
18392 %}
18393 ins_pipe( pipe_slow );
18394 %}
18395
18396 // ---------------------------------------- Gather ------------------------------------
18397
18398 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18399
18400 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18401 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18402 Matcher::vector_length_in_bytes(n) <= 32);
18403 match(Set dst (LoadVectorGather mem idx));
18404 effect(TEMP dst, TEMP tmp, TEMP mask);
18405 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18406 ins_encode %{
18407 int vlen_enc = vector_length_encoding(this);
18408 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18409 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18410 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18411 __ lea($tmp$$Register, $mem$$Address);
18412 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18413 %}
18414 ins_pipe( pipe_slow );
18415 %}
18416
18417
18418 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18419 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18420 !is_subword_type(Matcher::vector_element_basic_type(n)));
18421 match(Set dst (LoadVectorGather mem idx));
18422 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18423 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18424 ins_encode %{
18425 int vlen_enc = vector_length_encoding(this);
18426 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18427 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18428 __ lea($tmp$$Register, $mem$$Address);
18429 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18430 %}
18431 ins_pipe( pipe_slow );
18432 %}
18433
18434 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18435 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18436 !is_subword_type(Matcher::vector_element_basic_type(n)));
18437 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18438 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18439 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18440 ins_encode %{
18441 assert(UseAVX > 2, "sanity");
18442 int vlen_enc = vector_length_encoding(this);
18443 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18444 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18445 // Note: Since gather instruction partially updates the opmask register used
18446 // for predication hense moving mask operand to a temporary.
18447 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18448 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18449 __ lea($tmp$$Register, $mem$$Address);
18450 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18451 %}
18452 ins_pipe( pipe_slow );
18453 %}
18454
18455 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18456 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18457 match(Set dst (LoadVectorGather mem idx_base));
18458 effect(TEMP tmp, TEMP rtmp);
18459 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18460 ins_encode %{
18461 int vlen_enc = vector_length_encoding(this);
18462 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18463 __ lea($tmp$$Register, $mem$$Address);
18464 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18465 %}
18466 ins_pipe( pipe_slow );
18467 %}
18468
18469 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18470 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18471 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18472 match(Set dst (LoadVectorGather mem idx_base));
18473 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18474 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18475 ins_encode %{
18476 int vlen_enc = vector_length_encoding(this);
18477 int vector_len = Matcher::vector_length(this);
18478 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18479 __ lea($tmp$$Register, $mem$$Address);
18480 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18481 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18482 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18483 %}
18484 ins_pipe( pipe_slow );
18485 %}
18486
18487 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18488 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18489 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18490 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18491 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18492 ins_encode %{
18493 int vlen_enc = vector_length_encoding(this);
18494 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18495 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18496 __ lea($tmp$$Register, $mem$$Address);
18497 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18498 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18499 %}
18500 ins_pipe( pipe_slow );
18501 %}
18502
18503 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18504 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18505 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18506 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18507 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18508 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18509 ins_encode %{
18510 int vlen_enc = vector_length_encoding(this);
18511 int vector_len = Matcher::vector_length(this);
18512 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18513 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18514 __ lea($tmp$$Register, $mem$$Address);
18515 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18516 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18517 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18518 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18519 %}
18520 ins_pipe( pipe_slow );
18521 %}
18522
18523 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18524 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18525 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18526 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18527 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18528 ins_encode %{
18529 int vlen_enc = vector_length_encoding(this);
18530 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18531 __ lea($tmp$$Register, $mem$$Address);
18532 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18533 if (elem_bt == T_SHORT) {
18534 __ movl($mask_idx$$Register, 0x55555555);
18535 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18536 }
18537 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18538 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18539 %}
18540 ins_pipe( pipe_slow );
18541 %}
18542
18543 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18544 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18545 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18546 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18547 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18548 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18549 ins_encode %{
18550 int vlen_enc = vector_length_encoding(this);
18551 int vector_len = Matcher::vector_length(this);
18552 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18553 __ lea($tmp$$Register, $mem$$Address);
18554 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18555 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18556 if (elem_bt == T_SHORT) {
18557 __ movl($mask_idx$$Register, 0x55555555);
18558 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18559 }
18560 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18561 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18562 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18563 %}
18564 ins_pipe( pipe_slow );
18565 %}
18566
18567 // ====================Scatter=======================================
18568
18569 // Scatter INT, LONG, FLOAT, DOUBLE
18570
18571 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18572 predicate(UseAVX > 2);
18573 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18574 effect(TEMP tmp, TEMP ktmp);
18575 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18576 ins_encode %{
18577 int vlen_enc = vector_length_encoding(this, $src);
18578 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18579
18580 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18581 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18582
18583 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18584 __ lea($tmp$$Register, $mem$$Address);
18585 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18586 %}
18587 ins_pipe( pipe_slow );
18588 %}
18589
18590 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18591 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18592 effect(TEMP tmp, TEMP ktmp);
18593 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18594 ins_encode %{
18595 int vlen_enc = vector_length_encoding(this, $src);
18596 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18597 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18598 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18599 // Note: Since scatter instruction partially updates the opmask register used
18600 // for predication hense moving mask operand to a temporary.
18601 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18602 __ lea($tmp$$Register, $mem$$Address);
18603 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18604 %}
18605 ins_pipe( pipe_slow );
18606 %}
18607
18608 // ====================REPLICATE=======================================
18609
18610 // Replicate byte scalar to be vector
18611 instruct vReplB_reg(vec dst, rRegI src) %{
18612 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18613 match(Set dst (Replicate src));
18614 format %{ "replicateB $dst,$src" %}
18615 ins_encode %{
18616 uint vlen = Matcher::vector_length(this);
18617 if (UseAVX >= 2) {
18618 int vlen_enc = vector_length_encoding(this);
18619 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18620 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18621 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18622 } else {
18623 __ movdl($dst$$XMMRegister, $src$$Register);
18624 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18625 }
18626 } else {
18627 assert(UseAVX < 2, "");
18628 __ movdl($dst$$XMMRegister, $src$$Register);
18629 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18630 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18631 if (vlen >= 16) {
18632 assert(vlen == 16, "");
18633 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18634 }
18635 }
18636 %}
18637 ins_pipe( pipe_slow );
18638 %}
18639
18640 instruct ReplB_mem(vec dst, memory mem) %{
18641 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18642 match(Set dst (Replicate (LoadB mem)));
18643 format %{ "replicateB $dst,$mem" %}
18644 ins_encode %{
18645 int vlen_enc = vector_length_encoding(this);
18646 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18647 %}
18648 ins_pipe( pipe_slow );
18649 %}
18650
18651 // ====================ReplicateS=======================================
18652
18653 instruct vReplS_reg(vec dst, rRegI src) %{
18654 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18655 match(Set dst (Replicate src));
18656 format %{ "replicateS $dst,$src" %}
18657 ins_encode %{
18658 uint vlen = Matcher::vector_length(this);
18659 int vlen_enc = vector_length_encoding(this);
18660 if (UseAVX >= 2) {
18661 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18662 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18663 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18664 } else {
18665 __ movdl($dst$$XMMRegister, $src$$Register);
18666 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18667 }
18668 } else {
18669 assert(UseAVX < 2, "");
18670 __ movdl($dst$$XMMRegister, $src$$Register);
18671 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18672 if (vlen >= 8) {
18673 assert(vlen == 8, "");
18674 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18675 }
18676 }
18677 %}
18678 ins_pipe( pipe_slow );
18679 %}
18680
18681 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18682 match(Set dst (Replicate con));
18683 effect(TEMP rtmp);
18684 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18685 ins_encode %{
18686 int vlen_enc = vector_length_encoding(this);
18687 BasicType bt = Matcher::vector_element_basic_type(this);
18688 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18689 __ movl($rtmp$$Register, $con$$constant);
18690 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18691 %}
18692 ins_pipe( pipe_slow );
18693 %}
18694
18695 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18696 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18697 match(Set dst (Replicate src));
18698 effect(TEMP rtmp);
18699 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18700 ins_encode %{
18701 int vlen_enc = vector_length_encoding(this);
18702 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18703 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18704 %}
18705 ins_pipe( pipe_slow );
18706 %}
18707
18708 instruct ReplS_mem(vec dst, memory mem) %{
18709 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18710 match(Set dst (Replicate (LoadS mem)));
18711 format %{ "replicateS $dst,$mem" %}
18712 ins_encode %{
18713 int vlen_enc = vector_length_encoding(this);
18714 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18715 %}
18716 ins_pipe( pipe_slow );
18717 %}
18718
18719 // ====================ReplicateI=======================================
18720
18721 instruct ReplI_reg(vec dst, rRegI src) %{
18722 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18723 match(Set dst (Replicate src));
18724 format %{ "replicateI $dst,$src" %}
18725 ins_encode %{
18726 uint vlen = Matcher::vector_length(this);
18727 int vlen_enc = vector_length_encoding(this);
18728 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18729 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18730 } else if (VM_Version::supports_avx2()) {
18731 __ movdl($dst$$XMMRegister, $src$$Register);
18732 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18733 } else {
18734 __ movdl($dst$$XMMRegister, $src$$Register);
18735 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18736 }
18737 %}
18738 ins_pipe( pipe_slow );
18739 %}
18740
18741 instruct ReplI_mem(vec dst, memory mem) %{
18742 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18743 match(Set dst (Replicate (LoadI mem)));
18744 format %{ "replicateI $dst,$mem" %}
18745 ins_encode %{
18746 int vlen_enc = vector_length_encoding(this);
18747 if (VM_Version::supports_avx2()) {
18748 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18749 } else if (VM_Version::supports_avx()) {
18750 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18751 } else {
18752 __ movdl($dst$$XMMRegister, $mem$$Address);
18753 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18754 }
18755 %}
18756 ins_pipe( pipe_slow );
18757 %}
18758
18759 instruct ReplI_imm(vec dst, immI con) %{
18760 predicate(Matcher::is_non_long_integral_vector(n));
18761 match(Set dst (Replicate con));
18762 format %{ "replicateI $dst,$con" %}
18763 ins_encode %{
18764 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18765 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18766 type2aelembytes(Matcher::vector_element_basic_type(this))));
18767 BasicType bt = Matcher::vector_element_basic_type(this);
18768 int vlen = Matcher::vector_length_in_bytes(this);
18769 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18770 %}
18771 ins_pipe( pipe_slow );
18772 %}
18773
18774 // Replicate scalar zero to be vector
18775 instruct ReplI_zero(vec dst, immI_0 zero) %{
18776 predicate(Matcher::is_non_long_integral_vector(n));
18777 match(Set dst (Replicate zero));
18778 format %{ "replicateI $dst,$zero" %}
18779 ins_encode %{
18780 int vlen_enc = vector_length_encoding(this);
18781 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18782 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18783 } else {
18784 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18785 }
18786 %}
18787 ins_pipe( fpu_reg_reg );
18788 %}
18789
18790 instruct ReplI_M1(vec dst, immI_M1 con) %{
18791 predicate(Matcher::is_non_long_integral_vector(n));
18792 match(Set dst (Replicate con));
18793 format %{ "vallones $dst" %}
18794 ins_encode %{
18795 int vector_len = vector_length_encoding(this);
18796 __ vallones($dst$$XMMRegister, vector_len);
18797 %}
18798 ins_pipe( pipe_slow );
18799 %}
18800
18801 // ====================ReplicateL=======================================
18802
18803 // Replicate long (8 byte) scalar to be vector
18804 instruct ReplL_reg(vec dst, rRegL src) %{
18805 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18806 match(Set dst (Replicate src));
18807 format %{ "replicateL $dst,$src" %}
18808 ins_encode %{
18809 int vlen = Matcher::vector_length(this);
18810 int vlen_enc = vector_length_encoding(this);
18811 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18812 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18813 } else if (VM_Version::supports_avx2()) {
18814 __ movdq($dst$$XMMRegister, $src$$Register);
18815 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18816 } else {
18817 __ movdq($dst$$XMMRegister, $src$$Register);
18818 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18819 }
18820 %}
18821 ins_pipe( pipe_slow );
18822 %}
18823
18824 instruct ReplL_mem(vec dst, memory mem) %{
18825 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18826 match(Set dst (Replicate (LoadL mem)));
18827 format %{ "replicateL $dst,$mem" %}
18828 ins_encode %{
18829 int vlen_enc = vector_length_encoding(this);
18830 if (VM_Version::supports_avx2()) {
18831 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18832 } else if (VM_Version::supports_sse3()) {
18833 __ movddup($dst$$XMMRegister, $mem$$Address);
18834 } else {
18835 __ movq($dst$$XMMRegister, $mem$$Address);
18836 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18837 }
18838 %}
18839 ins_pipe( pipe_slow );
18840 %}
18841
18842 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18843 instruct ReplL_imm(vec dst, immL con) %{
18844 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18845 match(Set dst (Replicate con));
18846 format %{ "replicateL $dst,$con" %}
18847 ins_encode %{
18848 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18849 int vlen = Matcher::vector_length_in_bytes(this);
18850 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18851 %}
18852 ins_pipe( pipe_slow );
18853 %}
18854
18855 instruct ReplL_zero(vec dst, immL0 zero) %{
18856 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18857 match(Set dst (Replicate zero));
18858 format %{ "replicateL $dst,$zero" %}
18859 ins_encode %{
18860 int vlen_enc = vector_length_encoding(this);
18861 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18862 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18863 } else {
18864 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18865 }
18866 %}
18867 ins_pipe( fpu_reg_reg );
18868 %}
18869
18870 instruct ReplL_M1(vec dst, immL_M1 con) %{
18871 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18872 match(Set dst (Replicate con));
18873 format %{ "vallones $dst" %}
18874 ins_encode %{
18875 int vector_len = vector_length_encoding(this);
18876 __ vallones($dst$$XMMRegister, vector_len);
18877 %}
18878 ins_pipe( pipe_slow );
18879 %}
18880
18881 // ====================ReplicateF=======================================
18882
18883 instruct vReplF_reg(vec dst, vlRegF src) %{
18884 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18885 match(Set dst (Replicate src));
18886 format %{ "replicateF $dst,$src" %}
18887 ins_encode %{
18888 uint vlen = Matcher::vector_length(this);
18889 int vlen_enc = vector_length_encoding(this);
18890 if (vlen <= 4) {
18891 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18892 } else if (VM_Version::supports_avx2()) {
18893 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18894 } else {
18895 assert(vlen == 8, "sanity");
18896 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18897 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18898 }
18899 %}
18900 ins_pipe( pipe_slow );
18901 %}
18902
18903 instruct ReplF_reg(vec dst, vlRegF src) %{
18904 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18905 match(Set dst (Replicate src));
18906 format %{ "replicateF $dst,$src" %}
18907 ins_encode %{
18908 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18909 %}
18910 ins_pipe( pipe_slow );
18911 %}
18912
18913 instruct ReplF_mem(vec dst, memory mem) %{
18914 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18915 match(Set dst (Replicate (LoadF mem)));
18916 format %{ "replicateF $dst,$mem" %}
18917 ins_encode %{
18918 int vlen_enc = vector_length_encoding(this);
18919 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18920 %}
18921 ins_pipe( pipe_slow );
18922 %}
18923
18924 // Replicate float scalar immediate to be vector by loading from const table.
18925 instruct ReplF_imm(vec dst, immF con) %{
18926 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18927 match(Set dst (Replicate con));
18928 format %{ "replicateF $dst,$con" %}
18929 ins_encode %{
18930 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18931 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18932 int vlen = Matcher::vector_length_in_bytes(this);
18933 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18934 %}
18935 ins_pipe( pipe_slow );
18936 %}
18937
18938 instruct ReplF_zero(vec dst, immF0 zero) %{
18939 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18940 match(Set dst (Replicate zero));
18941 format %{ "replicateF $dst,$zero" %}
18942 ins_encode %{
18943 int vlen_enc = vector_length_encoding(this);
18944 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18945 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18946 } else {
18947 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18948 }
18949 %}
18950 ins_pipe( fpu_reg_reg );
18951 %}
18952
18953 // ====================ReplicateD=======================================
18954
18955 // Replicate double (8 bytes) scalar to be vector
18956 instruct vReplD_reg(vec dst, vlRegD src) %{
18957 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18958 match(Set dst (Replicate src));
18959 format %{ "replicateD $dst,$src" %}
18960 ins_encode %{
18961 uint vlen = Matcher::vector_length(this);
18962 int vlen_enc = vector_length_encoding(this);
18963 if (vlen <= 2) {
18964 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18965 } else if (VM_Version::supports_avx2()) {
18966 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18967 } else {
18968 assert(vlen == 4, "sanity");
18969 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18970 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18971 }
18972 %}
18973 ins_pipe( pipe_slow );
18974 %}
18975
18976 instruct ReplD_reg(vec dst, vlRegD src) %{
18977 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18978 match(Set dst (Replicate src));
18979 format %{ "replicateD $dst,$src" %}
18980 ins_encode %{
18981 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18982 %}
18983 ins_pipe( pipe_slow );
18984 %}
18985
18986 instruct ReplD_mem(vec dst, memory mem) %{
18987 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18988 match(Set dst (Replicate (LoadD mem)));
18989 format %{ "replicateD $dst,$mem" %}
18990 ins_encode %{
18991 if (Matcher::vector_length(this) >= 4) {
18992 int vlen_enc = vector_length_encoding(this);
18993 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18994 } else {
18995 __ movddup($dst$$XMMRegister, $mem$$Address);
18996 }
18997 %}
18998 ins_pipe( pipe_slow );
18999 %}
19000
19001 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19002 instruct ReplD_imm(vec dst, immD con) %{
19003 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19004 match(Set dst (Replicate con));
19005 format %{ "replicateD $dst,$con" %}
19006 ins_encode %{
19007 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19008 int vlen = Matcher::vector_length_in_bytes(this);
19009 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19010 %}
19011 ins_pipe( pipe_slow );
19012 %}
19013
19014 instruct ReplD_zero(vec dst, immD0 zero) %{
19015 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19016 match(Set dst (Replicate zero));
19017 format %{ "replicateD $dst,$zero" %}
19018 ins_encode %{
19019 int vlen_enc = vector_length_encoding(this);
19020 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19021 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19022 } else {
19023 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19024 }
19025 %}
19026 ins_pipe( fpu_reg_reg );
19027 %}
19028
19029 // ====================VECTOR INSERT=======================================
19030
19031 instruct insert(vec dst, rRegI val, immU8 idx) %{
19032 predicate(Matcher::vector_length_in_bytes(n) < 32);
19033 match(Set dst (VectorInsert (Binary dst val) idx));
19034 format %{ "vector_insert $dst,$val,$idx" %}
19035 ins_encode %{
19036 assert(UseSSE >= 4, "required");
19037 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19038
19039 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19040
19041 assert(is_integral_type(elem_bt), "");
19042 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19043
19044 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19045 %}
19046 ins_pipe( pipe_slow );
19047 %}
19048
19049 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19050 predicate(Matcher::vector_length_in_bytes(n) == 32);
19051 match(Set dst (VectorInsert (Binary src val) idx));
19052 effect(TEMP vtmp);
19053 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19054 ins_encode %{
19055 int vlen_enc = Assembler::AVX_256bit;
19056 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19057 int elem_per_lane = 16/type2aelembytes(elem_bt);
19058 int log2epr = log2(elem_per_lane);
19059
19060 assert(is_integral_type(elem_bt), "sanity");
19061 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19062
19063 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19064 uint y_idx = ($idx$$constant >> log2epr) & 1;
19065 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19066 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19067 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19068 %}
19069 ins_pipe( pipe_slow );
19070 %}
19071
19072 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19073 predicate(Matcher::vector_length_in_bytes(n) == 64);
19074 match(Set dst (VectorInsert (Binary src val) idx));
19075 effect(TEMP vtmp);
19076 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19077 ins_encode %{
19078 assert(UseAVX > 2, "sanity");
19079
19080 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19081 int elem_per_lane = 16/type2aelembytes(elem_bt);
19082 int log2epr = log2(elem_per_lane);
19083
19084 assert(is_integral_type(elem_bt), "");
19085 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19086
19087 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19088 uint y_idx = ($idx$$constant >> log2epr) & 3;
19089 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19090 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19091 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19092 %}
19093 ins_pipe( pipe_slow );
19094 %}
19095
19096 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19097 predicate(Matcher::vector_length(n) == 2);
19098 match(Set dst (VectorInsert (Binary dst val) idx));
19099 format %{ "vector_insert $dst,$val,$idx" %}
19100 ins_encode %{
19101 assert(UseSSE >= 4, "required");
19102 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19103 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19104
19105 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19106 %}
19107 ins_pipe( pipe_slow );
19108 %}
19109
19110 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19111 predicate(Matcher::vector_length(n) == 4);
19112 match(Set dst (VectorInsert (Binary src val) idx));
19113 effect(TEMP vtmp);
19114 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19115 ins_encode %{
19116 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19117 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19118
19119 uint x_idx = $idx$$constant & right_n_bits(1);
19120 uint y_idx = ($idx$$constant >> 1) & 1;
19121 int vlen_enc = Assembler::AVX_256bit;
19122 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19123 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19124 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19125 %}
19126 ins_pipe( pipe_slow );
19127 %}
19128
19129 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19130 predicate(Matcher::vector_length(n) == 8);
19131 match(Set dst (VectorInsert (Binary src val) idx));
19132 effect(TEMP vtmp);
19133 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19134 ins_encode %{
19135 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19136 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19137
19138 uint x_idx = $idx$$constant & right_n_bits(1);
19139 uint y_idx = ($idx$$constant >> 1) & 3;
19140 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19141 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19142 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19143 %}
19144 ins_pipe( pipe_slow );
19145 %}
19146
19147 instruct insertF(vec dst, regF val, immU8 idx) %{
19148 predicate(Matcher::vector_length(n) < 8);
19149 match(Set dst (VectorInsert (Binary dst val) idx));
19150 format %{ "vector_insert $dst,$val,$idx" %}
19151 ins_encode %{
19152 assert(UseSSE >= 4, "sanity");
19153
19154 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19155 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19156
19157 uint x_idx = $idx$$constant & right_n_bits(2);
19158 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19159 %}
19160 ins_pipe( pipe_slow );
19161 %}
19162
19163 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19164 predicate(Matcher::vector_length(n) >= 8);
19165 match(Set dst (VectorInsert (Binary src val) idx));
19166 effect(TEMP vtmp);
19167 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19168 ins_encode %{
19169 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19170 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19171
19172 int vlen = Matcher::vector_length(this);
19173 uint x_idx = $idx$$constant & right_n_bits(2);
19174 if (vlen == 8) {
19175 uint y_idx = ($idx$$constant >> 2) & 1;
19176 int vlen_enc = Assembler::AVX_256bit;
19177 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19178 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19179 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19180 } else {
19181 assert(vlen == 16, "sanity");
19182 uint y_idx = ($idx$$constant >> 2) & 3;
19183 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19184 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19185 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19186 }
19187 %}
19188 ins_pipe( pipe_slow );
19189 %}
19190
19191 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19192 predicate(Matcher::vector_length(n) == 2);
19193 match(Set dst (VectorInsert (Binary dst val) idx));
19194 effect(TEMP tmp);
19195 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19196 ins_encode %{
19197 assert(UseSSE >= 4, "sanity");
19198 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19199 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19200
19201 __ movq($tmp$$Register, $val$$XMMRegister);
19202 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19203 %}
19204 ins_pipe( pipe_slow );
19205 %}
19206
19207 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19208 predicate(Matcher::vector_length(n) == 4);
19209 match(Set dst (VectorInsert (Binary src val) idx));
19210 effect(TEMP vtmp, TEMP tmp);
19211 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19212 ins_encode %{
19213 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19214 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19215
19216 uint x_idx = $idx$$constant & right_n_bits(1);
19217 uint y_idx = ($idx$$constant >> 1) & 1;
19218 int vlen_enc = Assembler::AVX_256bit;
19219 __ movq($tmp$$Register, $val$$XMMRegister);
19220 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19221 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19222 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19223 %}
19224 ins_pipe( pipe_slow );
19225 %}
19226
19227 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19228 predicate(Matcher::vector_length(n) == 8);
19229 match(Set dst (VectorInsert (Binary src val) idx));
19230 effect(TEMP tmp, TEMP vtmp);
19231 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19232 ins_encode %{
19233 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19234 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19235
19236 uint x_idx = $idx$$constant & right_n_bits(1);
19237 uint y_idx = ($idx$$constant >> 1) & 3;
19238 __ movq($tmp$$Register, $val$$XMMRegister);
19239 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19240 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19241 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19242 %}
19243 ins_pipe( pipe_slow );
19244 %}
19245
19246 // ====================REDUCTION ARITHMETIC=======================================
19247
19248 // =======================Int Reduction==========================================
19249
19250 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19251 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19252 match(Set dst (AddReductionVI src1 src2));
19253 match(Set dst (MulReductionVI src1 src2));
19254 match(Set dst (AndReductionV src1 src2));
19255 match(Set dst ( OrReductionV src1 src2));
19256 match(Set dst (XorReductionV src1 src2));
19257 match(Set dst (MinReductionV src1 src2));
19258 match(Set dst (MaxReductionV src1 src2));
19259 effect(TEMP vtmp1, TEMP vtmp2);
19260 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19261 ins_encode %{
19262 int opcode = this->ideal_Opcode();
19263 int vlen = Matcher::vector_length(this, $src2);
19264 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19265 %}
19266 ins_pipe( pipe_slow );
19267 %}
19268
19269 // =======================Long Reduction==========================================
19270
19271 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19272 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19273 match(Set dst (AddReductionVL src1 src2));
19274 match(Set dst (MulReductionVL src1 src2));
19275 match(Set dst (AndReductionV src1 src2));
19276 match(Set dst ( OrReductionV src1 src2));
19277 match(Set dst (XorReductionV src1 src2));
19278 match(Set dst (MinReductionV src1 src2));
19279 match(Set dst (MaxReductionV src1 src2));
19280 effect(TEMP vtmp1, TEMP vtmp2);
19281 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19282 ins_encode %{
19283 int opcode = this->ideal_Opcode();
19284 int vlen = Matcher::vector_length(this, $src2);
19285 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19286 %}
19287 ins_pipe( pipe_slow );
19288 %}
19289
19290 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19291 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19292 match(Set dst (AddReductionVL src1 src2));
19293 match(Set dst (MulReductionVL src1 src2));
19294 match(Set dst (AndReductionV src1 src2));
19295 match(Set dst ( OrReductionV src1 src2));
19296 match(Set dst (XorReductionV src1 src2));
19297 match(Set dst (MinReductionV src1 src2));
19298 match(Set dst (MaxReductionV src1 src2));
19299 effect(TEMP vtmp1, TEMP vtmp2);
19300 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19301 ins_encode %{
19302 int opcode = this->ideal_Opcode();
19303 int vlen = Matcher::vector_length(this, $src2);
19304 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19305 %}
19306 ins_pipe( pipe_slow );
19307 %}
19308
19309 // =======================Float Reduction==========================================
19310
19311 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19312 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19313 match(Set dst (AddReductionVF dst src));
19314 match(Set dst (MulReductionVF dst src));
19315 effect(TEMP dst, TEMP vtmp);
19316 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19317 ins_encode %{
19318 int opcode = this->ideal_Opcode();
19319 int vlen = Matcher::vector_length(this, $src);
19320 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19321 %}
19322 ins_pipe( pipe_slow );
19323 %}
19324
19325 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19326 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19327 match(Set dst (AddReductionVF dst src));
19328 match(Set dst (MulReductionVF dst src));
19329 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19330 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19331 ins_encode %{
19332 int opcode = this->ideal_Opcode();
19333 int vlen = Matcher::vector_length(this, $src);
19334 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19335 %}
19336 ins_pipe( pipe_slow );
19337 %}
19338
19339 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19340 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19341 match(Set dst (AddReductionVF dst src));
19342 match(Set dst (MulReductionVF dst src));
19343 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19344 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19345 ins_encode %{
19346 int opcode = this->ideal_Opcode();
19347 int vlen = Matcher::vector_length(this, $src);
19348 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19349 %}
19350 ins_pipe( pipe_slow );
19351 %}
19352
19353
19354 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19355 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19356 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19357 // src1 contains reduction identity
19358 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19359 match(Set dst (AddReductionVF src1 src2));
19360 match(Set dst (MulReductionVF src1 src2));
19361 effect(TEMP dst);
19362 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19363 ins_encode %{
19364 int opcode = this->ideal_Opcode();
19365 int vlen = Matcher::vector_length(this, $src2);
19366 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19367 %}
19368 ins_pipe( pipe_slow );
19369 %}
19370
19371 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19372 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19373 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19374 // src1 contains reduction identity
19375 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19376 match(Set dst (AddReductionVF src1 src2));
19377 match(Set dst (MulReductionVF src1 src2));
19378 effect(TEMP dst, TEMP vtmp);
19379 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19380 ins_encode %{
19381 int opcode = this->ideal_Opcode();
19382 int vlen = Matcher::vector_length(this, $src2);
19383 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19384 %}
19385 ins_pipe( pipe_slow );
19386 %}
19387
19388 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19389 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19390 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19391 // src1 contains reduction identity
19392 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19393 match(Set dst (AddReductionVF src1 src2));
19394 match(Set dst (MulReductionVF src1 src2));
19395 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19396 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19397 ins_encode %{
19398 int opcode = this->ideal_Opcode();
19399 int vlen = Matcher::vector_length(this, $src2);
19400 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19401 %}
19402 ins_pipe( pipe_slow );
19403 %}
19404
19405 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19406 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19407 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19408 // src1 contains reduction identity
19409 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19410 match(Set dst (AddReductionVF src1 src2));
19411 match(Set dst (MulReductionVF src1 src2));
19412 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19413 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19414 ins_encode %{
19415 int opcode = this->ideal_Opcode();
19416 int vlen = Matcher::vector_length(this, $src2);
19417 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19418 %}
19419 ins_pipe( pipe_slow );
19420 %}
19421
19422 // =======================Double Reduction==========================================
19423
19424 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19425 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19426 match(Set dst (AddReductionVD dst src));
19427 match(Set dst (MulReductionVD dst src));
19428 effect(TEMP dst, TEMP vtmp);
19429 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19430 ins_encode %{
19431 int opcode = this->ideal_Opcode();
19432 int vlen = Matcher::vector_length(this, $src);
19433 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19434 %}
19435 ins_pipe( pipe_slow );
19436 %}
19437
19438 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19439 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19440 match(Set dst (AddReductionVD dst src));
19441 match(Set dst (MulReductionVD dst src));
19442 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19443 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19444 ins_encode %{
19445 int opcode = this->ideal_Opcode();
19446 int vlen = Matcher::vector_length(this, $src);
19447 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19448 %}
19449 ins_pipe( pipe_slow );
19450 %}
19451
19452 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19453 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19454 match(Set dst (AddReductionVD dst src));
19455 match(Set dst (MulReductionVD dst src));
19456 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19457 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19458 ins_encode %{
19459 int opcode = this->ideal_Opcode();
19460 int vlen = Matcher::vector_length(this, $src);
19461 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19462 %}
19463 ins_pipe( pipe_slow );
19464 %}
19465
19466 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19467 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19468 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19469 // src1 contains reduction identity
19470 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19471 match(Set dst (AddReductionVD src1 src2));
19472 match(Set dst (MulReductionVD src1 src2));
19473 effect(TEMP dst);
19474 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19475 ins_encode %{
19476 int opcode = this->ideal_Opcode();
19477 int vlen = Matcher::vector_length(this, $src2);
19478 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19479 %}
19480 ins_pipe( pipe_slow );
19481 %}
19482
19483 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19484 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19485 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19486 // src1 contains reduction identity
19487 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19488 match(Set dst (AddReductionVD src1 src2));
19489 match(Set dst (MulReductionVD src1 src2));
19490 effect(TEMP dst, TEMP vtmp);
19491 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19492 ins_encode %{
19493 int opcode = this->ideal_Opcode();
19494 int vlen = Matcher::vector_length(this, $src2);
19495 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19496 %}
19497 ins_pipe( pipe_slow );
19498 %}
19499
19500 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19501 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19502 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19503 // src1 contains reduction identity
19504 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19505 match(Set dst (AddReductionVD src1 src2));
19506 match(Set dst (MulReductionVD src1 src2));
19507 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19508 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19509 ins_encode %{
19510 int opcode = this->ideal_Opcode();
19511 int vlen = Matcher::vector_length(this, $src2);
19512 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19513 %}
19514 ins_pipe( pipe_slow );
19515 %}
19516
19517 // =======================Byte Reduction==========================================
19518
19519 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19520 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19521 match(Set dst (AddReductionVI src1 src2));
19522 match(Set dst (AndReductionV src1 src2));
19523 match(Set dst ( OrReductionV src1 src2));
19524 match(Set dst (XorReductionV src1 src2));
19525 match(Set dst (MinReductionV src1 src2));
19526 match(Set dst (MaxReductionV src1 src2));
19527 effect(TEMP vtmp1, TEMP vtmp2);
19528 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19529 ins_encode %{
19530 int opcode = this->ideal_Opcode();
19531 int vlen = Matcher::vector_length(this, $src2);
19532 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19533 %}
19534 ins_pipe( pipe_slow );
19535 %}
19536
19537 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19538 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19539 match(Set dst (AddReductionVI src1 src2));
19540 match(Set dst (AndReductionV src1 src2));
19541 match(Set dst ( OrReductionV src1 src2));
19542 match(Set dst (XorReductionV src1 src2));
19543 match(Set dst (MinReductionV src1 src2));
19544 match(Set dst (MaxReductionV src1 src2));
19545 effect(TEMP vtmp1, TEMP vtmp2);
19546 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19547 ins_encode %{
19548 int opcode = this->ideal_Opcode();
19549 int vlen = Matcher::vector_length(this, $src2);
19550 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19551 %}
19552 ins_pipe( pipe_slow );
19553 %}
19554
19555 // =======================Short Reduction==========================================
19556
19557 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19558 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19559 match(Set dst (AddReductionVI src1 src2));
19560 match(Set dst (MulReductionVI src1 src2));
19561 match(Set dst (AndReductionV src1 src2));
19562 match(Set dst ( OrReductionV src1 src2));
19563 match(Set dst (XorReductionV src1 src2));
19564 match(Set dst (MinReductionV src1 src2));
19565 match(Set dst (MaxReductionV src1 src2));
19566 effect(TEMP vtmp1, TEMP vtmp2);
19567 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19568 ins_encode %{
19569 int opcode = this->ideal_Opcode();
19570 int vlen = Matcher::vector_length(this, $src2);
19571 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19572 %}
19573 ins_pipe( pipe_slow );
19574 %}
19575
19576 // =======================Mul Reduction==========================================
19577
19578 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19579 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19580 Matcher::vector_length(n->in(2)) <= 32); // src2
19581 match(Set dst (MulReductionVI src1 src2));
19582 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19583 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19584 ins_encode %{
19585 int opcode = this->ideal_Opcode();
19586 int vlen = Matcher::vector_length(this, $src2);
19587 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19588 %}
19589 ins_pipe( pipe_slow );
19590 %}
19591
19592 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19593 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19594 Matcher::vector_length(n->in(2)) == 64); // src2
19595 match(Set dst (MulReductionVI src1 src2));
19596 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19597 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19598 ins_encode %{
19599 int opcode = this->ideal_Opcode();
19600 int vlen = Matcher::vector_length(this, $src2);
19601 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19602 %}
19603 ins_pipe( pipe_slow );
19604 %}
19605
19606 //--------------------Min/Max Float Reduction --------------------
19607 // Float Min Reduction
19608 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19609 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19610 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19611 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19612 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19613 Matcher::vector_length(n->in(2)) == 2);
19614 match(Set dst (MinReductionV src1 src2));
19615 match(Set dst (MaxReductionV src1 src2));
19616 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19617 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19618 ins_encode %{
19619 assert(UseAVX > 0, "sanity");
19620
19621 int opcode = this->ideal_Opcode();
19622 int vlen = Matcher::vector_length(this, $src2);
19623 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19624 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19625 %}
19626 ins_pipe( pipe_slow );
19627 %}
19628
19629 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19630 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19631 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19632 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19633 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19634 Matcher::vector_length(n->in(2)) >= 4);
19635 match(Set dst (MinReductionV src1 src2));
19636 match(Set dst (MaxReductionV src1 src2));
19637 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19638 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19639 ins_encode %{
19640 assert(UseAVX > 0, "sanity");
19641
19642 int opcode = this->ideal_Opcode();
19643 int vlen = Matcher::vector_length(this, $src2);
19644 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19645 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19646 %}
19647 ins_pipe( pipe_slow );
19648 %}
19649
19650 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19651 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19652 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19653 Matcher::vector_length(n->in(2)) == 2);
19654 match(Set dst (MinReductionV dst src));
19655 match(Set dst (MaxReductionV dst src));
19656 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19657 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19658 ins_encode %{
19659 assert(UseAVX > 0, "sanity");
19660
19661 int opcode = this->ideal_Opcode();
19662 int vlen = Matcher::vector_length(this, $src);
19663 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19664 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19665 %}
19666 ins_pipe( pipe_slow );
19667 %}
19668
19669
19670 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19671 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19672 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19673 Matcher::vector_length(n->in(2)) >= 4);
19674 match(Set dst (MinReductionV dst src));
19675 match(Set dst (MaxReductionV dst src));
19676 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19677 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19678 ins_encode %{
19679 assert(UseAVX > 0, "sanity");
19680
19681 int opcode = this->ideal_Opcode();
19682 int vlen = Matcher::vector_length(this, $src);
19683 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19684 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19685 %}
19686 ins_pipe( pipe_slow );
19687 %}
19688
19689 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19690 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19691 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19692 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19693 Matcher::vector_length(n->in(2)) == 2);
19694 match(Set dst (MinReductionV src1 src2));
19695 match(Set dst (MaxReductionV src1 src2));
19696 effect(TEMP dst, TEMP xtmp1);
19697 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19698 ins_encode %{
19699 int opcode = this->ideal_Opcode();
19700 int vlen = Matcher::vector_length(this, $src2);
19701 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19702 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19703 %}
19704 ins_pipe( pipe_slow );
19705 %}
19706
19707 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19708 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19709 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19710 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19711 Matcher::vector_length(n->in(2)) >= 4);
19712 match(Set dst (MinReductionV src1 src2));
19713 match(Set dst (MaxReductionV src1 src2));
19714 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19715 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19716 ins_encode %{
19717 int opcode = this->ideal_Opcode();
19718 int vlen = Matcher::vector_length(this, $src2);
19719 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19720 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19721 %}
19722 ins_pipe( pipe_slow );
19723 %}
19724
19725 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19726 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19727 Matcher::vector_length(n->in(2)) == 2);
19728 match(Set dst (MinReductionV dst src));
19729 match(Set dst (MaxReductionV dst src));
19730 effect(TEMP dst, TEMP xtmp1);
19731 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19732 ins_encode %{
19733 int opcode = this->ideal_Opcode();
19734 int vlen = Matcher::vector_length(this, $src);
19735 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19736 $xtmp1$$XMMRegister);
19737 %}
19738 ins_pipe( pipe_slow );
19739 %}
19740
19741 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19742 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19743 Matcher::vector_length(n->in(2)) >= 4);
19744 match(Set dst (MinReductionV dst src));
19745 match(Set dst (MaxReductionV dst src));
19746 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19747 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19748 ins_encode %{
19749 int opcode = this->ideal_Opcode();
19750 int vlen = Matcher::vector_length(this, $src);
19751 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19752 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19753 %}
19754 ins_pipe( pipe_slow );
19755 %}
19756
19757 //--------------------Min Double Reduction --------------------
19758 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19759 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19760 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19761 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19762 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19763 Matcher::vector_length(n->in(2)) == 2);
19764 match(Set dst (MinReductionV src1 src2));
19765 match(Set dst (MaxReductionV src1 src2));
19766 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19767 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19768 ins_encode %{
19769 assert(UseAVX > 0, "sanity");
19770
19771 int opcode = this->ideal_Opcode();
19772 int vlen = Matcher::vector_length(this, $src2);
19773 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19774 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19775 %}
19776 ins_pipe( pipe_slow );
19777 %}
19778
19779 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19780 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19781 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19782 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19783 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19784 Matcher::vector_length(n->in(2)) >= 4);
19785 match(Set dst (MinReductionV src1 src2));
19786 match(Set dst (MaxReductionV src1 src2));
19787 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19788 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19789 ins_encode %{
19790 assert(UseAVX > 0, "sanity");
19791
19792 int opcode = this->ideal_Opcode();
19793 int vlen = Matcher::vector_length(this, $src2);
19794 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19795 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19796 %}
19797 ins_pipe( pipe_slow );
19798 %}
19799
19800
19801 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19802 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19803 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19804 Matcher::vector_length(n->in(2)) == 2);
19805 match(Set dst (MinReductionV dst src));
19806 match(Set dst (MaxReductionV dst src));
19807 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19808 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19809 ins_encode %{
19810 assert(UseAVX > 0, "sanity");
19811
19812 int opcode = this->ideal_Opcode();
19813 int vlen = Matcher::vector_length(this, $src);
19814 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19815 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19816 %}
19817 ins_pipe( pipe_slow );
19818 %}
19819
19820 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19821 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19822 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19823 Matcher::vector_length(n->in(2)) >= 4);
19824 match(Set dst (MinReductionV dst src));
19825 match(Set dst (MaxReductionV dst src));
19826 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19827 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19828 ins_encode %{
19829 assert(UseAVX > 0, "sanity");
19830
19831 int opcode = this->ideal_Opcode();
19832 int vlen = Matcher::vector_length(this, $src);
19833 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19834 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19835 %}
19836 ins_pipe( pipe_slow );
19837 %}
19838
19839 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19840 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19841 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19842 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19843 Matcher::vector_length(n->in(2)) == 2);
19844 match(Set dst (MinReductionV src1 src2));
19845 match(Set dst (MaxReductionV src1 src2));
19846 effect(TEMP dst, TEMP xtmp1);
19847 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19848 ins_encode %{
19849 int opcode = this->ideal_Opcode();
19850 int vlen = Matcher::vector_length(this, $src2);
19851 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19852 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19853 %}
19854 ins_pipe( pipe_slow );
19855 %}
19856
19857 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19858 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19859 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19860 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19861 Matcher::vector_length(n->in(2)) >= 4);
19862 match(Set dst (MinReductionV src1 src2));
19863 match(Set dst (MaxReductionV src1 src2));
19864 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19865 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19866 ins_encode %{
19867 int opcode = this->ideal_Opcode();
19868 int vlen = Matcher::vector_length(this, $src2);
19869 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19870 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19871 %}
19872 ins_pipe( pipe_slow );
19873 %}
19874
19875
19876 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19877 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19878 Matcher::vector_length(n->in(2)) == 2);
19879 match(Set dst (MinReductionV dst src));
19880 match(Set dst (MaxReductionV dst src));
19881 effect(TEMP dst, TEMP xtmp1);
19882 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19883 ins_encode %{
19884 int opcode = this->ideal_Opcode();
19885 int vlen = Matcher::vector_length(this, $src);
19886 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19887 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19888 %}
19889 ins_pipe( pipe_slow );
19890 %}
19891
19892 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19893 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19894 Matcher::vector_length(n->in(2)) >= 4);
19895 match(Set dst (MinReductionV dst src));
19896 match(Set dst (MaxReductionV dst src));
19897 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19898 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19899 ins_encode %{
19900 int opcode = this->ideal_Opcode();
19901 int vlen = Matcher::vector_length(this, $src);
19902 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19903 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19904 %}
19905 ins_pipe( pipe_slow );
19906 %}
19907
19908 // ====================VECTOR ARITHMETIC=======================================
19909
19910 // --------------------------------- ADD --------------------------------------
19911
19912 // Bytes vector add
19913 instruct vaddB(vec dst, vec src) %{
19914 predicate(UseAVX == 0);
19915 match(Set dst (AddVB dst src));
19916 format %{ "paddb $dst,$src\t! add packedB" %}
19917 ins_encode %{
19918 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19919 %}
19920 ins_pipe( pipe_slow );
19921 %}
19922
19923 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19924 predicate(UseAVX > 0);
19925 match(Set dst (AddVB src1 src2));
19926 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19927 ins_encode %{
19928 int vlen_enc = vector_length_encoding(this);
19929 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19930 %}
19931 ins_pipe( pipe_slow );
19932 %}
19933
19934 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19935 predicate((UseAVX > 0) &&
19936 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19937 match(Set dst (AddVB src (LoadVector mem)));
19938 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19939 ins_encode %{
19940 int vlen_enc = vector_length_encoding(this);
19941 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19942 %}
19943 ins_pipe( pipe_slow );
19944 %}
19945
19946 // Shorts/Chars vector add
19947 instruct vaddS(vec dst, vec src) %{
19948 predicate(UseAVX == 0);
19949 match(Set dst (AddVS dst src));
19950 format %{ "paddw $dst,$src\t! add packedS" %}
19951 ins_encode %{
19952 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19953 %}
19954 ins_pipe( pipe_slow );
19955 %}
19956
19957 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19958 predicate(UseAVX > 0);
19959 match(Set dst (AddVS src1 src2));
19960 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19961 ins_encode %{
19962 int vlen_enc = vector_length_encoding(this);
19963 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19964 %}
19965 ins_pipe( pipe_slow );
19966 %}
19967
19968 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19969 predicate((UseAVX > 0) &&
19970 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19971 match(Set dst (AddVS src (LoadVector mem)));
19972 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19973 ins_encode %{
19974 int vlen_enc = vector_length_encoding(this);
19975 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19976 %}
19977 ins_pipe( pipe_slow );
19978 %}
19979
19980 // Integers vector add
19981 instruct vaddI(vec dst, vec src) %{
19982 predicate(UseAVX == 0);
19983 match(Set dst (AddVI dst src));
19984 format %{ "paddd $dst,$src\t! add packedI" %}
19985 ins_encode %{
19986 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19987 %}
19988 ins_pipe( pipe_slow );
19989 %}
19990
19991 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19992 predicate(UseAVX > 0);
19993 match(Set dst (AddVI src1 src2));
19994 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19995 ins_encode %{
19996 int vlen_enc = vector_length_encoding(this);
19997 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19998 %}
19999 ins_pipe( pipe_slow );
20000 %}
20001
20002
20003 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20004 predicate((UseAVX > 0) &&
20005 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20006 match(Set dst (AddVI src (LoadVector mem)));
20007 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20008 ins_encode %{
20009 int vlen_enc = vector_length_encoding(this);
20010 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20011 %}
20012 ins_pipe( pipe_slow );
20013 %}
20014
20015 // Longs vector add
20016 instruct vaddL(vec dst, vec src) %{
20017 predicate(UseAVX == 0);
20018 match(Set dst (AddVL dst src));
20019 format %{ "paddq $dst,$src\t! add packedL" %}
20020 ins_encode %{
20021 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20022 %}
20023 ins_pipe( pipe_slow );
20024 %}
20025
20026 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20027 predicate(UseAVX > 0);
20028 match(Set dst (AddVL src1 src2));
20029 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20030 ins_encode %{
20031 int vlen_enc = vector_length_encoding(this);
20032 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20033 %}
20034 ins_pipe( pipe_slow );
20035 %}
20036
20037 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20038 predicate((UseAVX > 0) &&
20039 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20040 match(Set dst (AddVL src (LoadVector mem)));
20041 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20042 ins_encode %{
20043 int vlen_enc = vector_length_encoding(this);
20044 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20045 %}
20046 ins_pipe( pipe_slow );
20047 %}
20048
20049 // Floats vector add
20050 instruct vaddF(vec dst, vec src) %{
20051 predicate(UseAVX == 0);
20052 match(Set dst (AddVF dst src));
20053 format %{ "addps $dst,$src\t! add packedF" %}
20054 ins_encode %{
20055 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20056 %}
20057 ins_pipe( pipe_slow );
20058 %}
20059
20060 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20061 predicate(UseAVX > 0);
20062 match(Set dst (AddVF src1 src2));
20063 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20064 ins_encode %{
20065 int vlen_enc = vector_length_encoding(this);
20066 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20067 %}
20068 ins_pipe( pipe_slow );
20069 %}
20070
20071 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20072 predicate((UseAVX > 0) &&
20073 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20074 match(Set dst (AddVF src (LoadVector mem)));
20075 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20076 ins_encode %{
20077 int vlen_enc = vector_length_encoding(this);
20078 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20079 %}
20080 ins_pipe( pipe_slow );
20081 %}
20082
20083 // Doubles vector add
20084 instruct vaddD(vec dst, vec src) %{
20085 predicate(UseAVX == 0);
20086 match(Set dst (AddVD dst src));
20087 format %{ "addpd $dst,$src\t! add packedD" %}
20088 ins_encode %{
20089 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20090 %}
20091 ins_pipe( pipe_slow );
20092 %}
20093
20094 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20095 predicate(UseAVX > 0);
20096 match(Set dst (AddVD src1 src2));
20097 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20098 ins_encode %{
20099 int vlen_enc = vector_length_encoding(this);
20100 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20101 %}
20102 ins_pipe( pipe_slow );
20103 %}
20104
20105 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20106 predicate((UseAVX > 0) &&
20107 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20108 match(Set dst (AddVD src (LoadVector mem)));
20109 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20110 ins_encode %{
20111 int vlen_enc = vector_length_encoding(this);
20112 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20113 %}
20114 ins_pipe( pipe_slow );
20115 %}
20116
20117 // --------------------------------- SUB --------------------------------------
20118
20119 // Bytes vector sub
20120 instruct vsubB(vec dst, vec src) %{
20121 predicate(UseAVX == 0);
20122 match(Set dst (SubVB dst src));
20123 format %{ "psubb $dst,$src\t! sub packedB" %}
20124 ins_encode %{
20125 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20126 %}
20127 ins_pipe( pipe_slow );
20128 %}
20129
20130 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20131 predicate(UseAVX > 0);
20132 match(Set dst (SubVB src1 src2));
20133 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20134 ins_encode %{
20135 int vlen_enc = vector_length_encoding(this);
20136 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20137 %}
20138 ins_pipe( pipe_slow );
20139 %}
20140
20141 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20142 predicate((UseAVX > 0) &&
20143 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20144 match(Set dst (SubVB src (LoadVector mem)));
20145 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20146 ins_encode %{
20147 int vlen_enc = vector_length_encoding(this);
20148 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20149 %}
20150 ins_pipe( pipe_slow );
20151 %}
20152
20153 // Shorts/Chars vector sub
20154 instruct vsubS(vec dst, vec src) %{
20155 predicate(UseAVX == 0);
20156 match(Set dst (SubVS dst src));
20157 format %{ "psubw $dst,$src\t! sub packedS" %}
20158 ins_encode %{
20159 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20160 %}
20161 ins_pipe( pipe_slow );
20162 %}
20163
20164
20165 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20166 predicate(UseAVX > 0);
20167 match(Set dst (SubVS src1 src2));
20168 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20169 ins_encode %{
20170 int vlen_enc = vector_length_encoding(this);
20171 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20172 %}
20173 ins_pipe( pipe_slow );
20174 %}
20175
20176 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20177 predicate((UseAVX > 0) &&
20178 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20179 match(Set dst (SubVS src (LoadVector mem)));
20180 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20181 ins_encode %{
20182 int vlen_enc = vector_length_encoding(this);
20183 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20184 %}
20185 ins_pipe( pipe_slow );
20186 %}
20187
20188 // Integers vector sub
20189 instruct vsubI(vec dst, vec src) %{
20190 predicate(UseAVX == 0);
20191 match(Set dst (SubVI dst src));
20192 format %{ "psubd $dst,$src\t! sub packedI" %}
20193 ins_encode %{
20194 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20195 %}
20196 ins_pipe( pipe_slow );
20197 %}
20198
20199 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20200 predicate(UseAVX > 0);
20201 match(Set dst (SubVI src1 src2));
20202 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20203 ins_encode %{
20204 int vlen_enc = vector_length_encoding(this);
20205 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20206 %}
20207 ins_pipe( pipe_slow );
20208 %}
20209
20210 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20211 predicate((UseAVX > 0) &&
20212 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20213 match(Set dst (SubVI src (LoadVector mem)));
20214 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20215 ins_encode %{
20216 int vlen_enc = vector_length_encoding(this);
20217 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20218 %}
20219 ins_pipe( pipe_slow );
20220 %}
20221
20222 // Longs vector sub
20223 instruct vsubL(vec dst, vec src) %{
20224 predicate(UseAVX == 0);
20225 match(Set dst (SubVL dst src));
20226 format %{ "psubq $dst,$src\t! sub packedL" %}
20227 ins_encode %{
20228 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20229 %}
20230 ins_pipe( pipe_slow );
20231 %}
20232
20233 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20234 predicate(UseAVX > 0);
20235 match(Set dst (SubVL src1 src2));
20236 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20237 ins_encode %{
20238 int vlen_enc = vector_length_encoding(this);
20239 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20240 %}
20241 ins_pipe( pipe_slow );
20242 %}
20243
20244
20245 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20246 predicate((UseAVX > 0) &&
20247 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20248 match(Set dst (SubVL src (LoadVector mem)));
20249 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20250 ins_encode %{
20251 int vlen_enc = vector_length_encoding(this);
20252 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20253 %}
20254 ins_pipe( pipe_slow );
20255 %}
20256
20257 // Floats vector sub
20258 instruct vsubF(vec dst, vec src) %{
20259 predicate(UseAVX == 0);
20260 match(Set dst (SubVF dst src));
20261 format %{ "subps $dst,$src\t! sub packedF" %}
20262 ins_encode %{
20263 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20264 %}
20265 ins_pipe( pipe_slow );
20266 %}
20267
20268 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20269 predicate(UseAVX > 0);
20270 match(Set dst (SubVF src1 src2));
20271 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20272 ins_encode %{
20273 int vlen_enc = vector_length_encoding(this);
20274 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20275 %}
20276 ins_pipe( pipe_slow );
20277 %}
20278
20279 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20280 predicate((UseAVX > 0) &&
20281 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20282 match(Set dst (SubVF src (LoadVector mem)));
20283 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20284 ins_encode %{
20285 int vlen_enc = vector_length_encoding(this);
20286 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20287 %}
20288 ins_pipe( pipe_slow );
20289 %}
20290
20291 // Doubles vector sub
20292 instruct vsubD(vec dst, vec src) %{
20293 predicate(UseAVX == 0);
20294 match(Set dst (SubVD dst src));
20295 format %{ "subpd $dst,$src\t! sub packedD" %}
20296 ins_encode %{
20297 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20298 %}
20299 ins_pipe( pipe_slow );
20300 %}
20301
20302 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20303 predicate(UseAVX > 0);
20304 match(Set dst (SubVD src1 src2));
20305 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20306 ins_encode %{
20307 int vlen_enc = vector_length_encoding(this);
20308 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20309 %}
20310 ins_pipe( pipe_slow );
20311 %}
20312
20313 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20314 predicate((UseAVX > 0) &&
20315 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20316 match(Set dst (SubVD src (LoadVector mem)));
20317 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20318 ins_encode %{
20319 int vlen_enc = vector_length_encoding(this);
20320 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20321 %}
20322 ins_pipe( pipe_slow );
20323 %}
20324
20325 // --------------------------------- MUL --------------------------------------
20326
20327 // Byte vector mul
20328 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20329 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20330 match(Set dst (MulVB src1 src2));
20331 effect(TEMP dst, TEMP xtmp);
20332 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20333 ins_encode %{
20334 assert(UseSSE > 3, "required");
20335 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20336 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20337 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20338 __ psllw($dst$$XMMRegister, 8);
20339 __ psrlw($dst$$XMMRegister, 8);
20340 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20341 %}
20342 ins_pipe( pipe_slow );
20343 %}
20344
20345 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20346 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20347 match(Set dst (MulVB src1 src2));
20348 effect(TEMP dst, TEMP xtmp);
20349 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20350 ins_encode %{
20351 assert(UseSSE > 3, "required");
20352 // Odd-index elements
20353 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20354 __ psrlw($dst$$XMMRegister, 8);
20355 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20356 __ psrlw($xtmp$$XMMRegister, 8);
20357 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20358 __ psllw($dst$$XMMRegister, 8);
20359 // Even-index elements
20360 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20361 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20362 __ psllw($xtmp$$XMMRegister, 8);
20363 __ psrlw($xtmp$$XMMRegister, 8);
20364 // Combine
20365 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20366 %}
20367 ins_pipe( pipe_slow );
20368 %}
20369
20370 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20371 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20372 match(Set dst (MulVB src1 src2));
20373 effect(TEMP xtmp1, TEMP xtmp2);
20374 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20375 ins_encode %{
20376 int vlen_enc = vector_length_encoding(this);
20377 // Odd-index elements
20378 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20379 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20380 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20381 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20382 // Even-index elements
20383 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20384 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20385 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20386 // Combine
20387 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20388 %}
20389 ins_pipe( pipe_slow );
20390 %}
20391
20392 // Shorts/Chars vector mul
20393 instruct vmulS(vec dst, vec src) %{
20394 predicate(UseAVX == 0);
20395 match(Set dst (MulVS dst src));
20396 format %{ "pmullw $dst,$src\t! mul packedS" %}
20397 ins_encode %{
20398 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20399 %}
20400 ins_pipe( pipe_slow );
20401 %}
20402
20403 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20404 predicate(UseAVX > 0);
20405 match(Set dst (MulVS src1 src2));
20406 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20407 ins_encode %{
20408 int vlen_enc = vector_length_encoding(this);
20409 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20410 %}
20411 ins_pipe( pipe_slow );
20412 %}
20413
20414 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20415 predicate((UseAVX > 0) &&
20416 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20417 match(Set dst (MulVS src (LoadVector mem)));
20418 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20419 ins_encode %{
20420 int vlen_enc = vector_length_encoding(this);
20421 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20422 %}
20423 ins_pipe( pipe_slow );
20424 %}
20425
20426 // Integers vector mul
20427 instruct vmulI(vec dst, vec src) %{
20428 predicate(UseAVX == 0);
20429 match(Set dst (MulVI dst src));
20430 format %{ "pmulld $dst,$src\t! mul packedI" %}
20431 ins_encode %{
20432 assert(UseSSE > 3, "required");
20433 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20434 %}
20435 ins_pipe( pipe_slow );
20436 %}
20437
20438 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20439 predicate(UseAVX > 0);
20440 match(Set dst (MulVI src1 src2));
20441 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20442 ins_encode %{
20443 int vlen_enc = vector_length_encoding(this);
20444 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20445 %}
20446 ins_pipe( pipe_slow );
20447 %}
20448
20449 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20450 predicate((UseAVX > 0) &&
20451 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20452 match(Set dst (MulVI src (LoadVector mem)));
20453 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20454 ins_encode %{
20455 int vlen_enc = vector_length_encoding(this);
20456 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20457 %}
20458 ins_pipe( pipe_slow );
20459 %}
20460
20461 // Longs vector mul
20462 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20463 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20464 VM_Version::supports_avx512dq()) ||
20465 VM_Version::supports_avx512vldq());
20466 match(Set dst (MulVL src1 src2));
20467 ins_cost(500);
20468 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20469 ins_encode %{
20470 assert(UseAVX > 2, "required");
20471 int vlen_enc = vector_length_encoding(this);
20472 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20473 %}
20474 ins_pipe( pipe_slow );
20475 %}
20476
20477 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20478 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20479 VM_Version::supports_avx512dq()) ||
20480 (Matcher::vector_length_in_bytes(n) > 8 &&
20481 VM_Version::supports_avx512vldq()));
20482 match(Set dst (MulVL src (LoadVector mem)));
20483 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20484 ins_cost(500);
20485 ins_encode %{
20486 assert(UseAVX > 2, "required");
20487 int vlen_enc = vector_length_encoding(this);
20488 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20489 %}
20490 ins_pipe( pipe_slow );
20491 %}
20492
20493 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20494 predicate(UseAVX == 0);
20495 match(Set dst (MulVL src1 src2));
20496 ins_cost(500);
20497 effect(TEMP dst, TEMP xtmp);
20498 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20499 ins_encode %{
20500 assert(VM_Version::supports_sse4_1(), "required");
20501 // Get the lo-hi products, only the lower 32 bits is in concerns
20502 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20503 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20504 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20505 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20506 __ psllq($dst$$XMMRegister, 32);
20507 // Get the lo-lo products
20508 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20509 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20510 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20511 %}
20512 ins_pipe( pipe_slow );
20513 %}
20514
20515 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20516 predicate(UseAVX > 0 &&
20517 ((Matcher::vector_length_in_bytes(n) == 64 &&
20518 !VM_Version::supports_avx512dq()) ||
20519 (Matcher::vector_length_in_bytes(n) < 64 &&
20520 !VM_Version::supports_avx512vldq())));
20521 match(Set dst (MulVL src1 src2));
20522 effect(TEMP xtmp1, TEMP xtmp2);
20523 ins_cost(500);
20524 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20525 ins_encode %{
20526 int vlen_enc = vector_length_encoding(this);
20527 // Get the lo-hi products, only the lower 32 bits is in concerns
20528 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20529 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20530 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20531 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20532 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20533 // Get the lo-lo products
20534 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20536 %}
20537 ins_pipe( pipe_slow );
20538 %}
20539
20540 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20541 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20542 match(Set dst (MulVL src1 src2));
20543 ins_cost(100);
20544 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20545 ins_encode %{
20546 int vlen_enc = vector_length_encoding(this);
20547 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20548 %}
20549 ins_pipe( pipe_slow );
20550 %}
20551
20552 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20553 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20554 match(Set dst (MulVL src1 src2));
20555 ins_cost(100);
20556 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20557 ins_encode %{
20558 int vlen_enc = vector_length_encoding(this);
20559 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20560 %}
20561 ins_pipe( pipe_slow );
20562 %}
20563
20564 // Floats vector mul
20565 instruct vmulF(vec dst, vec src) %{
20566 predicate(UseAVX == 0);
20567 match(Set dst (MulVF dst src));
20568 format %{ "mulps $dst,$src\t! mul packedF" %}
20569 ins_encode %{
20570 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20571 %}
20572 ins_pipe( pipe_slow );
20573 %}
20574
20575 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20576 predicate(UseAVX > 0);
20577 match(Set dst (MulVF src1 src2));
20578 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20579 ins_encode %{
20580 int vlen_enc = vector_length_encoding(this);
20581 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20582 %}
20583 ins_pipe( pipe_slow );
20584 %}
20585
20586 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20587 predicate((UseAVX > 0) &&
20588 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20589 match(Set dst (MulVF src (LoadVector mem)));
20590 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20591 ins_encode %{
20592 int vlen_enc = vector_length_encoding(this);
20593 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20594 %}
20595 ins_pipe( pipe_slow );
20596 %}
20597
20598 // Doubles vector mul
20599 instruct vmulD(vec dst, vec src) %{
20600 predicate(UseAVX == 0);
20601 match(Set dst (MulVD dst src));
20602 format %{ "mulpd $dst,$src\t! mul packedD" %}
20603 ins_encode %{
20604 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20605 %}
20606 ins_pipe( pipe_slow );
20607 %}
20608
20609 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20610 predicate(UseAVX > 0);
20611 match(Set dst (MulVD src1 src2));
20612 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20613 ins_encode %{
20614 int vlen_enc = vector_length_encoding(this);
20615 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20616 %}
20617 ins_pipe( pipe_slow );
20618 %}
20619
20620 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20621 predicate((UseAVX > 0) &&
20622 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20623 match(Set dst (MulVD src (LoadVector mem)));
20624 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20625 ins_encode %{
20626 int vlen_enc = vector_length_encoding(this);
20627 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20628 %}
20629 ins_pipe( pipe_slow );
20630 %}
20631
20632 // --------------------------------- DIV --------------------------------------
20633
20634 // Floats vector div
20635 instruct vdivF(vec dst, vec src) %{
20636 predicate(UseAVX == 0);
20637 match(Set dst (DivVF dst src));
20638 format %{ "divps $dst,$src\t! div packedF" %}
20639 ins_encode %{
20640 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20641 %}
20642 ins_pipe( pipe_slow );
20643 %}
20644
20645 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20646 predicate(UseAVX > 0);
20647 match(Set dst (DivVF src1 src2));
20648 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20649 ins_encode %{
20650 int vlen_enc = vector_length_encoding(this);
20651 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20652 %}
20653 ins_pipe( pipe_slow );
20654 %}
20655
20656 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20657 predicate((UseAVX > 0) &&
20658 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20659 match(Set dst (DivVF src (LoadVector mem)));
20660 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20661 ins_encode %{
20662 int vlen_enc = vector_length_encoding(this);
20663 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20664 %}
20665 ins_pipe( pipe_slow );
20666 %}
20667
20668 // Doubles vector div
20669 instruct vdivD(vec dst, vec src) %{
20670 predicate(UseAVX == 0);
20671 match(Set dst (DivVD dst src));
20672 format %{ "divpd $dst,$src\t! div packedD" %}
20673 ins_encode %{
20674 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20675 %}
20676 ins_pipe( pipe_slow );
20677 %}
20678
20679 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20680 predicate(UseAVX > 0);
20681 match(Set dst (DivVD src1 src2));
20682 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20683 ins_encode %{
20684 int vlen_enc = vector_length_encoding(this);
20685 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20686 %}
20687 ins_pipe( pipe_slow );
20688 %}
20689
20690 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20691 predicate((UseAVX > 0) &&
20692 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20693 match(Set dst (DivVD src (LoadVector mem)));
20694 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20695 ins_encode %{
20696 int vlen_enc = vector_length_encoding(this);
20697 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20698 %}
20699 ins_pipe( pipe_slow );
20700 %}
20701
20702 // ------------------------------ MinMax ---------------------------------------
20703
20704 // Byte, Short, Int vector Min/Max
20705 instruct minmax_reg_sse(vec dst, vec src) %{
20706 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20707 UseAVX == 0);
20708 match(Set dst (MinV dst src));
20709 match(Set dst (MaxV dst src));
20710 format %{ "vector_minmax $dst,$src\t! " %}
20711 ins_encode %{
20712 assert(UseSSE >= 4, "required");
20713
20714 int opcode = this->ideal_Opcode();
20715 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20716 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20717 %}
20718 ins_pipe( pipe_slow );
20719 %}
20720
20721 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20722 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20723 UseAVX > 0);
20724 match(Set dst (MinV src1 src2));
20725 match(Set dst (MaxV src1 src2));
20726 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20727 ins_encode %{
20728 int opcode = this->ideal_Opcode();
20729 int vlen_enc = vector_length_encoding(this);
20730 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20731
20732 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20733 %}
20734 ins_pipe( pipe_slow );
20735 %}
20736
20737 // Long vector Min/Max
20738 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20739 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20740 UseAVX == 0);
20741 match(Set dst (MinV dst src));
20742 match(Set dst (MaxV src dst));
20743 effect(TEMP dst, TEMP tmp);
20744 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20745 ins_encode %{
20746 assert(UseSSE >= 4, "required");
20747
20748 int opcode = this->ideal_Opcode();
20749 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20750 assert(elem_bt == T_LONG, "sanity");
20751
20752 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20753 %}
20754 ins_pipe( pipe_slow );
20755 %}
20756
20757 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20758 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20759 UseAVX > 0 && !VM_Version::supports_avx512vl());
20760 match(Set dst (MinV src1 src2));
20761 match(Set dst (MaxV src1 src2));
20762 effect(TEMP dst);
20763 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20764 ins_encode %{
20765 int vlen_enc = vector_length_encoding(this);
20766 int opcode = this->ideal_Opcode();
20767 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20768 assert(elem_bt == T_LONG, "sanity");
20769
20770 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20771 %}
20772 ins_pipe( pipe_slow );
20773 %}
20774
20775 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20776 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20777 Matcher::vector_element_basic_type(n) == T_LONG);
20778 match(Set dst (MinV src1 src2));
20779 match(Set dst (MaxV src1 src2));
20780 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20781 ins_encode %{
20782 assert(UseAVX > 2, "required");
20783
20784 int vlen_enc = vector_length_encoding(this);
20785 int opcode = this->ideal_Opcode();
20786 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20787 assert(elem_bt == T_LONG, "sanity");
20788
20789 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20790 %}
20791 ins_pipe( pipe_slow );
20792 %}
20793
20794 // Float/Double vector Min/Max
20795 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20796 predicate(VM_Version::supports_avx10_2() &&
20797 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20798 match(Set dst (MinV a b));
20799 match(Set dst (MaxV a b));
20800 format %{ "vector_minmaxFP $dst, $a, $b" %}
20801 ins_encode %{
20802 int vlen_enc = vector_length_encoding(this);
20803 int opcode = this->ideal_Opcode();
20804 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20805 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20806 %}
20807 ins_pipe( pipe_slow );
20808 %}
20809
20810 // Float/Double vector Min/Max
20811 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20812 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20813 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20814 UseAVX > 0);
20815 match(Set dst (MinV a b));
20816 match(Set dst (MaxV a b));
20817 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20818 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20819 ins_encode %{
20820 assert(UseAVX > 0, "required");
20821
20822 int opcode = this->ideal_Opcode();
20823 int vlen_enc = vector_length_encoding(this);
20824 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20825
20826 __ vminmax_fp(opcode, elem_bt,
20827 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20828 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20829 %}
20830 ins_pipe( pipe_slow );
20831 %}
20832
20833 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20834 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20835 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20836 match(Set dst (MinV a b));
20837 match(Set dst (MaxV a b));
20838 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20839 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20840 ins_encode %{
20841 assert(UseAVX > 2, "required");
20842
20843 int opcode = this->ideal_Opcode();
20844 int vlen_enc = vector_length_encoding(this);
20845 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20846
20847 __ evminmax_fp(opcode, elem_bt,
20848 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20849 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20850 %}
20851 ins_pipe( pipe_slow );
20852 %}
20853
20854 // ------------------------------ Unsigned vector Min/Max ----------------------
20855
20856 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20857 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20858 match(Set dst (UMinV a b));
20859 match(Set dst (UMaxV a b));
20860 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20861 ins_encode %{
20862 int opcode = this->ideal_Opcode();
20863 int vlen_enc = vector_length_encoding(this);
20864 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20865 assert(is_integral_type(elem_bt), "");
20866 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20867 %}
20868 ins_pipe( pipe_slow );
20869 %}
20870
20871 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20872 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20873 match(Set dst (UMinV a (LoadVector b)));
20874 match(Set dst (UMaxV a (LoadVector b)));
20875 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20876 ins_encode %{
20877 int opcode = this->ideal_Opcode();
20878 int vlen_enc = vector_length_encoding(this);
20879 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20880 assert(is_integral_type(elem_bt), "");
20881 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20882 %}
20883 ins_pipe( pipe_slow );
20884 %}
20885
20886 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20887 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20888 match(Set dst (UMinV a b));
20889 match(Set dst (UMaxV a b));
20890 effect(TEMP xtmp1, TEMP xtmp2);
20891 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20892 ins_encode %{
20893 int opcode = this->ideal_Opcode();
20894 int vlen_enc = vector_length_encoding(this);
20895 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20896 %}
20897 ins_pipe( pipe_slow );
20898 %}
20899
20900 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20901 match(Set dst (UMinV (Binary dst src2) mask));
20902 match(Set dst (UMaxV (Binary dst src2) mask));
20903 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20904 ins_encode %{
20905 int vlen_enc = vector_length_encoding(this);
20906 BasicType bt = Matcher::vector_element_basic_type(this);
20907 int opc = this->ideal_Opcode();
20908 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20909 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20910 %}
20911 ins_pipe( pipe_slow );
20912 %}
20913
20914 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20915 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20916 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20917 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20918 ins_encode %{
20919 int vlen_enc = vector_length_encoding(this);
20920 BasicType bt = Matcher::vector_element_basic_type(this);
20921 int opc = this->ideal_Opcode();
20922 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20923 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20924 %}
20925 ins_pipe( pipe_slow );
20926 %}
20927
20928 // --------------------------------- Signum/CopySign ---------------------------
20929
20930 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20931 match(Set dst (SignumF dst (Binary zero one)));
20932 effect(KILL cr);
20933 format %{ "signumF $dst, $dst" %}
20934 ins_encode %{
20935 int opcode = this->ideal_Opcode();
20936 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20937 %}
20938 ins_pipe( pipe_slow );
20939 %}
20940
20941 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20942 match(Set dst (SignumD dst (Binary zero one)));
20943 effect(KILL cr);
20944 format %{ "signumD $dst, $dst" %}
20945 ins_encode %{
20946 int opcode = this->ideal_Opcode();
20947 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20948 %}
20949 ins_pipe( pipe_slow );
20950 %}
20951
20952 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20953 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20954 match(Set dst (SignumVF src (Binary zero one)));
20955 match(Set dst (SignumVD src (Binary zero one)));
20956 effect(TEMP dst, TEMP xtmp1);
20957 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20958 ins_encode %{
20959 int opcode = this->ideal_Opcode();
20960 int vec_enc = vector_length_encoding(this);
20961 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20962 $xtmp1$$XMMRegister, vec_enc);
20963 %}
20964 ins_pipe( pipe_slow );
20965 %}
20966
20967 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20968 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20969 match(Set dst (SignumVF src (Binary zero one)));
20970 match(Set dst (SignumVD src (Binary zero one)));
20971 effect(TEMP dst, TEMP ktmp1);
20972 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20973 ins_encode %{
20974 int opcode = this->ideal_Opcode();
20975 int vec_enc = vector_length_encoding(this);
20976 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20977 $ktmp1$$KRegister, vec_enc);
20978 %}
20979 ins_pipe( pipe_slow );
20980 %}
20981
20982 // ---------------------------------------
20983 // For copySign use 0xE4 as writemask for vpternlog
20984 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20985 // C (xmm2) is set to 0x7FFFFFFF
20986 // Wherever xmm2 is 0, we want to pick from B (sign)
20987 // Wherever xmm2 is 1, we want to pick from A (src)
20988 //
20989 // A B C Result
20990 // 0 0 0 0
20991 // 0 0 1 0
20992 // 0 1 0 1
20993 // 0 1 1 0
20994 // 1 0 0 0
20995 // 1 0 1 1
20996 // 1 1 0 1
20997 // 1 1 1 1
20998 //
20999 // Result going from high bit to low bit is 0x11100100 = 0xe4
21000 // ---------------------------------------
21001
21002 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21003 match(Set dst (CopySignF dst src));
21004 effect(TEMP tmp1, TEMP tmp2);
21005 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21006 ins_encode %{
21007 __ movl($tmp2$$Register, 0x7FFFFFFF);
21008 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21009 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21010 %}
21011 ins_pipe( pipe_slow );
21012 %}
21013
21014 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21015 match(Set dst (CopySignD dst (Binary src zero)));
21016 ins_cost(100);
21017 effect(TEMP tmp1, TEMP tmp2);
21018 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21019 ins_encode %{
21020 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21021 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21022 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21023 %}
21024 ins_pipe( pipe_slow );
21025 %}
21026
21027 //----------------------------- CompressBits/ExpandBits ------------------------
21028
21029 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21030 predicate(n->bottom_type()->isa_int());
21031 match(Set dst (CompressBits src mask));
21032 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21033 ins_encode %{
21034 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21035 %}
21036 ins_pipe( pipe_slow );
21037 %}
21038
21039 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21040 predicate(n->bottom_type()->isa_int());
21041 match(Set dst (ExpandBits src mask));
21042 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21043 ins_encode %{
21044 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21045 %}
21046 ins_pipe( pipe_slow );
21047 %}
21048
21049 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21050 predicate(n->bottom_type()->isa_int());
21051 match(Set dst (CompressBits src (LoadI mask)));
21052 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21053 ins_encode %{
21054 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21055 %}
21056 ins_pipe( pipe_slow );
21057 %}
21058
21059 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21060 predicate(n->bottom_type()->isa_int());
21061 match(Set dst (ExpandBits src (LoadI mask)));
21062 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21063 ins_encode %{
21064 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21065 %}
21066 ins_pipe( pipe_slow );
21067 %}
21068
21069 // --------------------------------- Sqrt --------------------------------------
21070
21071 instruct vsqrtF_reg(vec dst, vec src) %{
21072 match(Set dst (SqrtVF src));
21073 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21074 ins_encode %{
21075 assert(UseAVX > 0, "required");
21076 int vlen_enc = vector_length_encoding(this);
21077 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21078 %}
21079 ins_pipe( pipe_slow );
21080 %}
21081
21082 instruct vsqrtF_mem(vec dst, memory mem) %{
21083 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21084 match(Set dst (SqrtVF (LoadVector mem)));
21085 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21086 ins_encode %{
21087 assert(UseAVX > 0, "required");
21088 int vlen_enc = vector_length_encoding(this);
21089 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21090 %}
21091 ins_pipe( pipe_slow );
21092 %}
21093
21094 // Floating point vector sqrt
21095 instruct vsqrtD_reg(vec dst, vec src) %{
21096 match(Set dst (SqrtVD src));
21097 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21098 ins_encode %{
21099 assert(UseAVX > 0, "required");
21100 int vlen_enc = vector_length_encoding(this);
21101 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21102 %}
21103 ins_pipe( pipe_slow );
21104 %}
21105
21106 instruct vsqrtD_mem(vec dst, memory mem) %{
21107 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21108 match(Set dst (SqrtVD (LoadVector mem)));
21109 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21110 ins_encode %{
21111 assert(UseAVX > 0, "required");
21112 int vlen_enc = vector_length_encoding(this);
21113 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21114 %}
21115 ins_pipe( pipe_slow );
21116 %}
21117
21118 // ------------------------------ Shift ---------------------------------------
21119
21120 // Left and right shift count vectors are the same on x86
21121 // (only lowest bits of xmm reg are used for count).
21122 instruct vshiftcnt(vec dst, rRegI cnt) %{
21123 match(Set dst (LShiftCntV cnt));
21124 match(Set dst (RShiftCntV cnt));
21125 format %{ "movdl $dst,$cnt\t! load shift count" %}
21126 ins_encode %{
21127 __ movdl($dst$$XMMRegister, $cnt$$Register);
21128 %}
21129 ins_pipe( pipe_slow );
21130 %}
21131
21132 // Byte vector shift
21133 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21134 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21135 match(Set dst ( LShiftVB src shift));
21136 match(Set dst ( RShiftVB src shift));
21137 match(Set dst (URShiftVB src shift));
21138 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21139 format %{"vector_byte_shift $dst,$src,$shift" %}
21140 ins_encode %{
21141 assert(UseSSE > 3, "required");
21142 int opcode = this->ideal_Opcode();
21143 bool sign = (opcode != Op_URShiftVB);
21144 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21145 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21146 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21147 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21148 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21149 %}
21150 ins_pipe( pipe_slow );
21151 %}
21152
21153 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21154 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21155 UseAVX <= 1);
21156 match(Set dst ( LShiftVB src shift));
21157 match(Set dst ( RShiftVB src shift));
21158 match(Set dst (URShiftVB src shift));
21159 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21160 format %{"vector_byte_shift $dst,$src,$shift" %}
21161 ins_encode %{
21162 assert(UseSSE > 3, "required");
21163 int opcode = this->ideal_Opcode();
21164 bool sign = (opcode != Op_URShiftVB);
21165 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21166 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21167 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21168 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21169 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21170 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21171 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21172 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21173 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21174 %}
21175 ins_pipe( pipe_slow );
21176 %}
21177
21178 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21179 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21180 UseAVX > 1);
21181 match(Set dst ( LShiftVB src shift));
21182 match(Set dst ( RShiftVB src shift));
21183 match(Set dst (URShiftVB src shift));
21184 effect(TEMP dst, TEMP tmp);
21185 format %{"vector_byte_shift $dst,$src,$shift" %}
21186 ins_encode %{
21187 int opcode = this->ideal_Opcode();
21188 bool sign = (opcode != Op_URShiftVB);
21189 int vlen_enc = Assembler::AVX_256bit;
21190 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21191 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21192 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21193 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21194 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21195 %}
21196 ins_pipe( pipe_slow );
21197 %}
21198
21199 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21200 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21201 match(Set dst ( LShiftVB src shift));
21202 match(Set dst ( RShiftVB src shift));
21203 match(Set dst (URShiftVB src shift));
21204 effect(TEMP dst, TEMP tmp);
21205 format %{"vector_byte_shift $dst,$src,$shift" %}
21206 ins_encode %{
21207 assert(UseAVX > 1, "required");
21208 int opcode = this->ideal_Opcode();
21209 bool sign = (opcode != Op_URShiftVB);
21210 int vlen_enc = Assembler::AVX_256bit;
21211 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21212 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21213 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21214 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21215 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21216 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21217 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21218 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21219 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21220 %}
21221 ins_pipe( pipe_slow );
21222 %}
21223
21224 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21225 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21226 match(Set dst ( LShiftVB src shift));
21227 match(Set dst (RShiftVB src shift));
21228 match(Set dst (URShiftVB src shift));
21229 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21230 format %{"vector_byte_shift $dst,$src,$shift" %}
21231 ins_encode %{
21232 assert(UseAVX > 2, "required");
21233 int opcode = this->ideal_Opcode();
21234 bool sign = (opcode != Op_URShiftVB);
21235 int vlen_enc = Assembler::AVX_512bit;
21236 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21237 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21238 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21239 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21240 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21241 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21242 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21243 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21244 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21245 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21246 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21247 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21248 %}
21249 ins_pipe( pipe_slow );
21250 %}
21251
21252 // Shorts vector logical right shift produces incorrect Java result
21253 // for negative data because java code convert short value into int with
21254 // sign extension before a shift. But char vectors are fine since chars are
21255 // unsigned values.
21256 // Shorts/Chars vector left shift
21257 instruct vshiftS(vec dst, vec src, vec shift) %{
21258 predicate(!n->as_ShiftV()->is_var_shift());
21259 match(Set dst ( LShiftVS src shift));
21260 match(Set dst ( RShiftVS src shift));
21261 match(Set dst (URShiftVS src shift));
21262 effect(TEMP dst, USE src, USE shift);
21263 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21264 ins_encode %{
21265 int opcode = this->ideal_Opcode();
21266 if (UseAVX > 0) {
21267 int vlen_enc = vector_length_encoding(this);
21268 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21269 } else {
21270 int vlen = Matcher::vector_length(this);
21271 if (vlen == 2) {
21272 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21273 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21274 } else if (vlen == 4) {
21275 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21276 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21277 } else {
21278 assert (vlen == 8, "sanity");
21279 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21280 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21281 }
21282 }
21283 %}
21284 ins_pipe( pipe_slow );
21285 %}
21286
21287 // Integers vector left shift
21288 instruct vshiftI(vec dst, vec src, vec shift) %{
21289 predicate(!n->as_ShiftV()->is_var_shift());
21290 match(Set dst ( LShiftVI src shift));
21291 match(Set dst ( RShiftVI src shift));
21292 match(Set dst (URShiftVI src shift));
21293 effect(TEMP dst, USE src, USE shift);
21294 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21295 ins_encode %{
21296 int opcode = this->ideal_Opcode();
21297 if (UseAVX > 0) {
21298 int vlen_enc = vector_length_encoding(this);
21299 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21300 } else {
21301 int vlen = Matcher::vector_length(this);
21302 if (vlen == 2) {
21303 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21304 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21305 } else {
21306 assert(vlen == 4, "sanity");
21307 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21308 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21309 }
21310 }
21311 %}
21312 ins_pipe( pipe_slow );
21313 %}
21314
21315 // Integers vector left constant shift
21316 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21317 match(Set dst (LShiftVI src (LShiftCntV shift)));
21318 match(Set dst (RShiftVI src (RShiftCntV shift)));
21319 match(Set dst (URShiftVI src (RShiftCntV shift)));
21320 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21321 ins_encode %{
21322 int opcode = this->ideal_Opcode();
21323 if (UseAVX > 0) {
21324 int vector_len = vector_length_encoding(this);
21325 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21326 } else {
21327 int vlen = Matcher::vector_length(this);
21328 if (vlen == 2) {
21329 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21330 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21331 } else {
21332 assert(vlen == 4, "sanity");
21333 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21334 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21335 }
21336 }
21337 %}
21338 ins_pipe( pipe_slow );
21339 %}
21340
21341 // Longs vector shift
21342 instruct vshiftL(vec dst, vec src, vec shift) %{
21343 predicate(!n->as_ShiftV()->is_var_shift());
21344 match(Set dst ( LShiftVL src shift));
21345 match(Set dst (URShiftVL src shift));
21346 effect(TEMP dst, USE src, USE shift);
21347 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21348 ins_encode %{
21349 int opcode = this->ideal_Opcode();
21350 if (UseAVX > 0) {
21351 int vlen_enc = vector_length_encoding(this);
21352 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21353 } else {
21354 assert(Matcher::vector_length(this) == 2, "");
21355 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21356 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21357 }
21358 %}
21359 ins_pipe( pipe_slow );
21360 %}
21361
21362 // Longs vector constant shift
21363 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21364 match(Set dst (LShiftVL src (LShiftCntV shift)));
21365 match(Set dst (URShiftVL src (RShiftCntV shift)));
21366 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21367 ins_encode %{
21368 int opcode = this->ideal_Opcode();
21369 if (UseAVX > 0) {
21370 int vector_len = vector_length_encoding(this);
21371 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21372 } else {
21373 assert(Matcher::vector_length(this) == 2, "");
21374 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21375 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21376 }
21377 %}
21378 ins_pipe( pipe_slow );
21379 %}
21380
21381 // -------------------ArithmeticRightShift -----------------------------------
21382 // Long vector arithmetic right shift
21383 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21384 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21385 match(Set dst (RShiftVL src shift));
21386 effect(TEMP dst, TEMP tmp);
21387 format %{ "vshiftq $dst,$src,$shift" %}
21388 ins_encode %{
21389 uint vlen = Matcher::vector_length(this);
21390 if (vlen == 2) {
21391 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21392 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21393 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21394 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21395 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21396 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21397 } else {
21398 assert(vlen == 4, "sanity");
21399 assert(UseAVX > 1, "required");
21400 int vlen_enc = Assembler::AVX_256bit;
21401 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21402 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21403 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21404 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21405 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21406 }
21407 %}
21408 ins_pipe( pipe_slow );
21409 %}
21410
21411 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21412 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21413 match(Set dst (RShiftVL src shift));
21414 format %{ "vshiftq $dst,$src,$shift" %}
21415 ins_encode %{
21416 int vlen_enc = vector_length_encoding(this);
21417 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21418 %}
21419 ins_pipe( pipe_slow );
21420 %}
21421
21422 // ------------------- Variable Shift -----------------------------
21423 // Byte variable shift
21424 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21425 predicate(Matcher::vector_length(n) <= 8 &&
21426 n->as_ShiftV()->is_var_shift() &&
21427 !VM_Version::supports_avx512bw());
21428 match(Set dst ( LShiftVB src shift));
21429 match(Set dst ( RShiftVB src shift));
21430 match(Set dst (URShiftVB src shift));
21431 effect(TEMP dst, TEMP vtmp);
21432 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21433 ins_encode %{
21434 assert(UseAVX >= 2, "required");
21435
21436 int opcode = this->ideal_Opcode();
21437 int vlen_enc = Assembler::AVX_128bit;
21438 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21439 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21440 %}
21441 ins_pipe( pipe_slow );
21442 %}
21443
21444 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21445 predicate(Matcher::vector_length(n) == 16 &&
21446 n->as_ShiftV()->is_var_shift() &&
21447 !VM_Version::supports_avx512bw());
21448 match(Set dst ( LShiftVB src shift));
21449 match(Set dst ( RShiftVB src shift));
21450 match(Set dst (URShiftVB src shift));
21451 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21452 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21453 ins_encode %{
21454 assert(UseAVX >= 2, "required");
21455
21456 int opcode = this->ideal_Opcode();
21457 int vlen_enc = Assembler::AVX_128bit;
21458 // Shift lower half and get word result in dst
21459 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21460
21461 // Shift upper half and get word result in vtmp1
21462 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21463 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21464 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21465
21466 // Merge and down convert the two word results to byte in dst
21467 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21468 %}
21469 ins_pipe( pipe_slow );
21470 %}
21471
21472 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21473 predicate(Matcher::vector_length(n) == 32 &&
21474 n->as_ShiftV()->is_var_shift() &&
21475 !VM_Version::supports_avx512bw());
21476 match(Set dst ( LShiftVB src shift));
21477 match(Set dst ( RShiftVB src shift));
21478 match(Set dst (URShiftVB src shift));
21479 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21480 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21481 ins_encode %{
21482 assert(UseAVX >= 2, "required");
21483
21484 int opcode = this->ideal_Opcode();
21485 int vlen_enc = Assembler::AVX_128bit;
21486 // Process lower 128 bits and get result in dst
21487 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21488 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21489 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21490 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21491 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21492
21493 // Process higher 128 bits and get result in vtmp3
21494 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21495 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21496 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21497 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21498 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21499 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21500 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21501
21502 // Merge the two results in dst
21503 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21504 %}
21505 ins_pipe( pipe_slow );
21506 %}
21507
21508 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21509 predicate(Matcher::vector_length(n) <= 32 &&
21510 n->as_ShiftV()->is_var_shift() &&
21511 VM_Version::supports_avx512bw());
21512 match(Set dst ( LShiftVB src shift));
21513 match(Set dst ( RShiftVB src shift));
21514 match(Set dst (URShiftVB src shift));
21515 effect(TEMP dst, TEMP vtmp);
21516 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21517 ins_encode %{
21518 assert(UseAVX > 2, "required");
21519
21520 int opcode = this->ideal_Opcode();
21521 int vlen_enc = vector_length_encoding(this);
21522 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21523 %}
21524 ins_pipe( pipe_slow );
21525 %}
21526
21527 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21528 predicate(Matcher::vector_length(n) == 64 &&
21529 n->as_ShiftV()->is_var_shift() &&
21530 VM_Version::supports_avx512bw());
21531 match(Set dst ( LShiftVB src shift));
21532 match(Set dst ( RShiftVB src shift));
21533 match(Set dst (URShiftVB src shift));
21534 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21535 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21536 ins_encode %{
21537 assert(UseAVX > 2, "required");
21538
21539 int opcode = this->ideal_Opcode();
21540 int vlen_enc = Assembler::AVX_256bit;
21541 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21542 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21543 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21544 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21545 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21546 %}
21547 ins_pipe( pipe_slow );
21548 %}
21549
21550 // Short variable shift
21551 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21552 predicate(Matcher::vector_length(n) <= 8 &&
21553 n->as_ShiftV()->is_var_shift() &&
21554 !VM_Version::supports_avx512bw());
21555 match(Set dst ( LShiftVS src shift));
21556 match(Set dst ( RShiftVS src shift));
21557 match(Set dst (URShiftVS src shift));
21558 effect(TEMP dst, TEMP vtmp);
21559 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21560 ins_encode %{
21561 assert(UseAVX >= 2, "required");
21562
21563 int opcode = this->ideal_Opcode();
21564 bool sign = (opcode != Op_URShiftVS);
21565 int vlen_enc = Assembler::AVX_256bit;
21566 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21567 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21568 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21569 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21570 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21571 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21572 %}
21573 ins_pipe( pipe_slow );
21574 %}
21575
21576 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21577 predicate(Matcher::vector_length(n) == 16 &&
21578 n->as_ShiftV()->is_var_shift() &&
21579 !VM_Version::supports_avx512bw());
21580 match(Set dst ( LShiftVS src shift));
21581 match(Set dst ( RShiftVS src shift));
21582 match(Set dst (URShiftVS src shift));
21583 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21584 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21585 ins_encode %{
21586 assert(UseAVX >= 2, "required");
21587
21588 int opcode = this->ideal_Opcode();
21589 bool sign = (opcode != Op_URShiftVS);
21590 int vlen_enc = Assembler::AVX_256bit;
21591 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21592 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21593 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21594 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21595 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21596
21597 // Shift upper half, with result in dst using vtmp1 as TEMP
21598 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21599 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21600 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21601 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21602 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21603 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21604
21605 // Merge lower and upper half result into dst
21606 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21607 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21608 %}
21609 ins_pipe( pipe_slow );
21610 %}
21611
21612 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21613 predicate(n->as_ShiftV()->is_var_shift() &&
21614 VM_Version::supports_avx512bw());
21615 match(Set dst ( LShiftVS src shift));
21616 match(Set dst ( RShiftVS src shift));
21617 match(Set dst (URShiftVS src shift));
21618 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21619 ins_encode %{
21620 assert(UseAVX > 2, "required");
21621
21622 int opcode = this->ideal_Opcode();
21623 int vlen_enc = vector_length_encoding(this);
21624 if (!VM_Version::supports_avx512vl()) {
21625 vlen_enc = Assembler::AVX_512bit;
21626 }
21627 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21628 %}
21629 ins_pipe( pipe_slow );
21630 %}
21631
21632 //Integer variable shift
21633 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21634 predicate(n->as_ShiftV()->is_var_shift());
21635 match(Set dst ( LShiftVI src shift));
21636 match(Set dst ( RShiftVI src shift));
21637 match(Set dst (URShiftVI src shift));
21638 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21639 ins_encode %{
21640 assert(UseAVX >= 2, "required");
21641
21642 int opcode = this->ideal_Opcode();
21643 int vlen_enc = vector_length_encoding(this);
21644 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21645 %}
21646 ins_pipe( pipe_slow );
21647 %}
21648
21649 //Long variable shift
21650 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21651 predicate(n->as_ShiftV()->is_var_shift());
21652 match(Set dst ( LShiftVL src shift));
21653 match(Set dst (URShiftVL src shift));
21654 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21655 ins_encode %{
21656 assert(UseAVX >= 2, "required");
21657
21658 int opcode = this->ideal_Opcode();
21659 int vlen_enc = vector_length_encoding(this);
21660 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21661 %}
21662 ins_pipe( pipe_slow );
21663 %}
21664
21665 //Long variable right shift arithmetic
21666 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21667 predicate(Matcher::vector_length(n) <= 4 &&
21668 n->as_ShiftV()->is_var_shift() &&
21669 UseAVX == 2);
21670 match(Set dst (RShiftVL src shift));
21671 effect(TEMP dst, TEMP vtmp);
21672 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21673 ins_encode %{
21674 int opcode = this->ideal_Opcode();
21675 int vlen_enc = vector_length_encoding(this);
21676 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21677 $vtmp$$XMMRegister);
21678 %}
21679 ins_pipe( pipe_slow );
21680 %}
21681
21682 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21683 predicate(n->as_ShiftV()->is_var_shift() &&
21684 UseAVX > 2);
21685 match(Set dst (RShiftVL src shift));
21686 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21687 ins_encode %{
21688 int opcode = this->ideal_Opcode();
21689 int vlen_enc = vector_length_encoding(this);
21690 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21691 %}
21692 ins_pipe( pipe_slow );
21693 %}
21694
21695 // --------------------------------- AND --------------------------------------
21696
21697 instruct vand(vec dst, vec src) %{
21698 predicate(UseAVX == 0);
21699 match(Set dst (AndV dst src));
21700 format %{ "pand $dst,$src\t! and vectors" %}
21701 ins_encode %{
21702 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21703 %}
21704 ins_pipe( pipe_slow );
21705 %}
21706
21707 instruct vand_reg(vec dst, vec src1, vec src2) %{
21708 predicate(UseAVX > 0);
21709 match(Set dst (AndV src1 src2));
21710 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21711 ins_encode %{
21712 int vlen_enc = vector_length_encoding(this);
21713 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21714 %}
21715 ins_pipe( pipe_slow );
21716 %}
21717
21718 instruct vand_mem(vec dst, vec src, memory mem) %{
21719 predicate((UseAVX > 0) &&
21720 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21721 match(Set dst (AndV src (LoadVector mem)));
21722 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21723 ins_encode %{
21724 int vlen_enc = vector_length_encoding(this);
21725 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21726 %}
21727 ins_pipe( pipe_slow );
21728 %}
21729
21730 // --------------------------------- OR ---------------------------------------
21731
21732 instruct vor(vec dst, vec src) %{
21733 predicate(UseAVX == 0);
21734 match(Set dst (OrV dst src));
21735 format %{ "por $dst,$src\t! or vectors" %}
21736 ins_encode %{
21737 __ por($dst$$XMMRegister, $src$$XMMRegister);
21738 %}
21739 ins_pipe( pipe_slow );
21740 %}
21741
21742 instruct vor_reg(vec dst, vec src1, vec src2) %{
21743 predicate(UseAVX > 0);
21744 match(Set dst (OrV src1 src2));
21745 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21746 ins_encode %{
21747 int vlen_enc = vector_length_encoding(this);
21748 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21749 %}
21750 ins_pipe( pipe_slow );
21751 %}
21752
21753 instruct vor_mem(vec dst, vec src, memory mem) %{
21754 predicate((UseAVX > 0) &&
21755 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21756 match(Set dst (OrV src (LoadVector mem)));
21757 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21758 ins_encode %{
21759 int vlen_enc = vector_length_encoding(this);
21760 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21761 %}
21762 ins_pipe( pipe_slow );
21763 %}
21764
21765 // --------------------------------- XOR --------------------------------------
21766
21767 instruct vxor(vec dst, vec src) %{
21768 predicate(UseAVX == 0);
21769 match(Set dst (XorV dst src));
21770 format %{ "pxor $dst,$src\t! xor vectors" %}
21771 ins_encode %{
21772 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21773 %}
21774 ins_pipe( pipe_slow );
21775 %}
21776
21777 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21778 predicate(UseAVX > 0);
21779 match(Set dst (XorV src1 src2));
21780 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21781 ins_encode %{
21782 int vlen_enc = vector_length_encoding(this);
21783 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21784 %}
21785 ins_pipe( pipe_slow );
21786 %}
21787
21788 instruct vxor_mem(vec dst, vec src, memory mem) %{
21789 predicate((UseAVX > 0) &&
21790 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21791 match(Set dst (XorV src (LoadVector mem)));
21792 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21793 ins_encode %{
21794 int vlen_enc = vector_length_encoding(this);
21795 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21796 %}
21797 ins_pipe( pipe_slow );
21798 %}
21799
21800 // --------------------------------- VectorCast --------------------------------------
21801
21802 instruct vcastBtoX(vec dst, vec src) %{
21803 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21804 match(Set dst (VectorCastB2X src));
21805 format %{ "vector_cast_b2x $dst,$src\t!" %}
21806 ins_encode %{
21807 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21808 int vlen_enc = vector_length_encoding(this);
21809 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21810 %}
21811 ins_pipe( pipe_slow );
21812 %}
21813
21814 instruct vcastBtoD(legVec dst, legVec src) %{
21815 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21816 match(Set dst (VectorCastB2X src));
21817 format %{ "vector_cast_b2x $dst,$src\t!" %}
21818 ins_encode %{
21819 int vlen_enc = vector_length_encoding(this);
21820 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21821 %}
21822 ins_pipe( pipe_slow );
21823 %}
21824
21825 instruct castStoX(vec dst, vec src) %{
21826 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21827 Matcher::vector_length(n->in(1)) <= 8 && // src
21828 Matcher::vector_element_basic_type(n) == T_BYTE);
21829 match(Set dst (VectorCastS2X src));
21830 format %{ "vector_cast_s2x $dst,$src" %}
21831 ins_encode %{
21832 assert(UseAVX > 0, "required");
21833
21834 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21835 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21836 %}
21837 ins_pipe( pipe_slow );
21838 %}
21839
21840 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21841 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21842 Matcher::vector_length(n->in(1)) == 16 && // src
21843 Matcher::vector_element_basic_type(n) == T_BYTE);
21844 effect(TEMP dst, TEMP vtmp);
21845 match(Set dst (VectorCastS2X src));
21846 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21847 ins_encode %{
21848 assert(UseAVX > 0, "required");
21849
21850 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21851 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21852 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21853 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21854 %}
21855 ins_pipe( pipe_slow );
21856 %}
21857
21858 instruct vcastStoX_evex(vec dst, vec src) %{
21859 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21860 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21861 match(Set dst (VectorCastS2X src));
21862 format %{ "vector_cast_s2x $dst,$src\t!" %}
21863 ins_encode %{
21864 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21865 int src_vlen_enc = vector_length_encoding(this, $src);
21866 int vlen_enc = vector_length_encoding(this);
21867 switch (to_elem_bt) {
21868 case T_BYTE:
21869 if (!VM_Version::supports_avx512vl()) {
21870 vlen_enc = Assembler::AVX_512bit;
21871 }
21872 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21873 break;
21874 case T_INT:
21875 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21876 break;
21877 case T_FLOAT:
21878 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21879 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21880 break;
21881 case T_LONG:
21882 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21883 break;
21884 case T_DOUBLE: {
21885 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21886 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21887 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21888 break;
21889 }
21890 default:
21891 ShouldNotReachHere();
21892 }
21893 %}
21894 ins_pipe( pipe_slow );
21895 %}
21896
21897 instruct castItoX(vec dst, vec src) %{
21898 predicate(UseAVX <= 2 &&
21899 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21900 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21901 match(Set dst (VectorCastI2X src));
21902 format %{ "vector_cast_i2x $dst,$src" %}
21903 ins_encode %{
21904 assert(UseAVX > 0, "required");
21905
21906 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21907 int vlen_enc = vector_length_encoding(this, $src);
21908
21909 if (to_elem_bt == T_BYTE) {
21910 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21911 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21912 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21913 } else {
21914 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21915 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21916 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21917 }
21918 %}
21919 ins_pipe( pipe_slow );
21920 %}
21921
21922 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21923 predicate(UseAVX <= 2 &&
21924 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21925 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21926 match(Set dst (VectorCastI2X src));
21927 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21928 effect(TEMP dst, TEMP vtmp);
21929 ins_encode %{
21930 assert(UseAVX > 0, "required");
21931
21932 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21933 int vlen_enc = vector_length_encoding(this, $src);
21934
21935 if (to_elem_bt == T_BYTE) {
21936 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21937 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21938 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21939 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21940 } else {
21941 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21942 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21943 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21944 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21945 }
21946 %}
21947 ins_pipe( pipe_slow );
21948 %}
21949
21950 instruct vcastItoX_evex(vec dst, vec src) %{
21951 predicate(UseAVX > 2 ||
21952 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21953 match(Set dst (VectorCastI2X src));
21954 format %{ "vector_cast_i2x $dst,$src\t!" %}
21955 ins_encode %{
21956 assert(UseAVX > 0, "required");
21957
21958 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21959 int src_vlen_enc = vector_length_encoding(this, $src);
21960 int dst_vlen_enc = vector_length_encoding(this);
21961 switch (dst_elem_bt) {
21962 case T_BYTE:
21963 if (!VM_Version::supports_avx512vl()) {
21964 src_vlen_enc = Assembler::AVX_512bit;
21965 }
21966 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21967 break;
21968 case T_SHORT:
21969 if (!VM_Version::supports_avx512vl()) {
21970 src_vlen_enc = Assembler::AVX_512bit;
21971 }
21972 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21973 break;
21974 case T_FLOAT:
21975 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21976 break;
21977 case T_LONG:
21978 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21979 break;
21980 case T_DOUBLE:
21981 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21982 break;
21983 default:
21984 ShouldNotReachHere();
21985 }
21986 %}
21987 ins_pipe( pipe_slow );
21988 %}
21989
21990 instruct vcastLtoBS(vec dst, vec src) %{
21991 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21992 UseAVX <= 2);
21993 match(Set dst (VectorCastL2X src));
21994 format %{ "vector_cast_l2x $dst,$src" %}
21995 ins_encode %{
21996 assert(UseAVX > 0, "required");
21997
21998 int vlen = Matcher::vector_length_in_bytes(this, $src);
21999 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22000 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22001 : ExternalAddress(vector_int_to_short_mask());
22002 if (vlen <= 16) {
22003 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22004 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22005 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22006 } else {
22007 assert(vlen <= 32, "required");
22008 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22009 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22010 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22011 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22012 }
22013 if (to_elem_bt == T_BYTE) {
22014 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22015 }
22016 %}
22017 ins_pipe( pipe_slow );
22018 %}
22019
22020 instruct vcastLtoX_evex(vec dst, vec src) %{
22021 predicate(UseAVX > 2 ||
22022 (Matcher::vector_element_basic_type(n) == T_INT ||
22023 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22024 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22025 match(Set dst (VectorCastL2X src));
22026 format %{ "vector_cast_l2x $dst,$src\t!" %}
22027 ins_encode %{
22028 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22029 int vlen = Matcher::vector_length_in_bytes(this, $src);
22030 int vlen_enc = vector_length_encoding(this, $src);
22031 switch (to_elem_bt) {
22032 case T_BYTE:
22033 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22034 vlen_enc = Assembler::AVX_512bit;
22035 }
22036 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037 break;
22038 case T_SHORT:
22039 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22040 vlen_enc = Assembler::AVX_512bit;
22041 }
22042 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22043 break;
22044 case T_INT:
22045 if (vlen == 8) {
22046 if ($dst$$XMMRegister != $src$$XMMRegister) {
22047 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22048 }
22049 } else if (vlen == 16) {
22050 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22051 } else if (vlen == 32) {
22052 if (UseAVX > 2) {
22053 if (!VM_Version::supports_avx512vl()) {
22054 vlen_enc = Assembler::AVX_512bit;
22055 }
22056 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22057 } else {
22058 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22059 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22060 }
22061 } else { // vlen == 64
22062 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22063 }
22064 break;
22065 case T_FLOAT:
22066 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22067 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22068 break;
22069 case T_DOUBLE:
22070 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22071 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22072 break;
22073
22074 default: assert(false, "%s", type2name(to_elem_bt));
22075 }
22076 %}
22077 ins_pipe( pipe_slow );
22078 %}
22079
22080 instruct vcastFtoD_reg(vec dst, vec src) %{
22081 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22082 match(Set dst (VectorCastF2X src));
22083 format %{ "vector_cast_f2d $dst,$src\t!" %}
22084 ins_encode %{
22085 int vlen_enc = vector_length_encoding(this);
22086 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22087 %}
22088 ins_pipe( pipe_slow );
22089 %}
22090
22091
22092 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22093 predicate(!VM_Version::supports_avx10_2() &&
22094 !VM_Version::supports_avx512vl() &&
22095 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22096 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22097 is_integral_type(Matcher::vector_element_basic_type(n)));
22098 match(Set dst (VectorCastF2X src));
22099 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22100 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22101 ins_encode %{
22102 int vlen_enc = vector_length_encoding(this, $src);
22103 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22104 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22105 // 32 bit addresses for register indirect addressing mode since stub constants
22106 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22107 // However, targets are free to increase this limit, but having a large code cache size
22108 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22109 // cap we save a temporary register allocation which in limiting case can prevent
22110 // spilling in high register pressure blocks.
22111 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22112 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22113 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22114 %}
22115 ins_pipe( pipe_slow );
22116 %}
22117
22118 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22119 predicate(!VM_Version::supports_avx10_2() &&
22120 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22121 is_integral_type(Matcher::vector_element_basic_type(n)));
22122 match(Set dst (VectorCastF2X src));
22123 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22124 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22125 ins_encode %{
22126 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22127 if (to_elem_bt == T_LONG) {
22128 int vlen_enc = vector_length_encoding(this);
22129 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22130 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22131 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22132 } else {
22133 int vlen_enc = vector_length_encoding(this, $src);
22134 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22135 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22136 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22137 }
22138 %}
22139 ins_pipe( pipe_slow );
22140 %}
22141
22142 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22143 predicate(VM_Version::supports_avx10_2() &&
22144 is_integral_type(Matcher::vector_element_basic_type(n)));
22145 match(Set dst (VectorCastF2X src));
22146 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22147 ins_encode %{
22148 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22149 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22150 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22151 %}
22152 ins_pipe( pipe_slow );
22153 %}
22154
22155 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22156 predicate(VM_Version::supports_avx10_2() &&
22157 is_integral_type(Matcher::vector_element_basic_type(n)));
22158 match(Set dst (VectorCastF2X (LoadVector src)));
22159 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22160 ins_encode %{
22161 int vlen = Matcher::vector_length(this);
22162 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22163 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22164 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22165 %}
22166 ins_pipe( pipe_slow );
22167 %}
22168
22169 instruct vcastDtoF_reg(vec dst, vec src) %{
22170 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22171 match(Set dst (VectorCastD2X src));
22172 format %{ "vector_cast_d2x $dst,$src\t!" %}
22173 ins_encode %{
22174 int vlen_enc = vector_length_encoding(this, $src);
22175 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22176 %}
22177 ins_pipe( pipe_slow );
22178 %}
22179
22180 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22181 predicate(!VM_Version::supports_avx10_2() &&
22182 !VM_Version::supports_avx512vl() &&
22183 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22184 is_integral_type(Matcher::vector_element_basic_type(n)));
22185 match(Set dst (VectorCastD2X src));
22186 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22187 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22188 ins_encode %{
22189 int vlen_enc = vector_length_encoding(this, $src);
22190 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22191 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22192 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22193 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22194 %}
22195 ins_pipe( pipe_slow );
22196 %}
22197
22198 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22199 predicate(!VM_Version::supports_avx10_2() &&
22200 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22201 is_integral_type(Matcher::vector_element_basic_type(n)));
22202 match(Set dst (VectorCastD2X src));
22203 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22204 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22205 ins_encode %{
22206 int vlen_enc = vector_length_encoding(this, $src);
22207 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22208 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22209 ExternalAddress(vector_float_signflip());
22210 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22211 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22212 %}
22213 ins_pipe( pipe_slow );
22214 %}
22215
22216 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22217 predicate(VM_Version::supports_avx10_2() &&
22218 is_integral_type(Matcher::vector_element_basic_type(n)));
22219 match(Set dst (VectorCastD2X src));
22220 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22221 ins_encode %{
22222 int vlen_enc = vector_length_encoding(this, $src);
22223 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22224 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22225 %}
22226 ins_pipe( pipe_slow );
22227 %}
22228
22229 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22230 predicate(VM_Version::supports_avx10_2() &&
22231 is_integral_type(Matcher::vector_element_basic_type(n)));
22232 match(Set dst (VectorCastD2X (LoadVector src)));
22233 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22234 ins_encode %{
22235 int vlen = Matcher::vector_length(this);
22236 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22237 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22238 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22239 %}
22240 ins_pipe( pipe_slow );
22241 %}
22242
22243 instruct vucast(vec dst, vec src) %{
22244 match(Set dst (VectorUCastB2X src));
22245 match(Set dst (VectorUCastS2X src));
22246 match(Set dst (VectorUCastI2X src));
22247 format %{ "vector_ucast $dst,$src\t!" %}
22248 ins_encode %{
22249 assert(UseAVX > 0, "required");
22250
22251 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22252 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22253 int vlen_enc = vector_length_encoding(this);
22254 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22255 %}
22256 ins_pipe( pipe_slow );
22257 %}
22258
22259 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22260 predicate(!VM_Version::supports_avx512vl() &&
22261 Matcher::vector_length_in_bytes(n) < 64 &&
22262 Matcher::vector_element_basic_type(n) == T_INT);
22263 match(Set dst (RoundVF src));
22264 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22265 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22266 ins_encode %{
22267 int vlen_enc = vector_length_encoding(this);
22268 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22269 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22270 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22271 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22272 %}
22273 ins_pipe( pipe_slow );
22274 %}
22275
22276 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22277 predicate((VM_Version::supports_avx512vl() ||
22278 Matcher::vector_length_in_bytes(n) == 64) &&
22279 Matcher::vector_element_basic_type(n) == T_INT);
22280 match(Set dst (RoundVF src));
22281 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22282 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22283 ins_encode %{
22284 int vlen_enc = vector_length_encoding(this);
22285 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22286 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22287 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22288 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22289 %}
22290 ins_pipe( pipe_slow );
22291 %}
22292
22293 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22294 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22295 match(Set dst (RoundVD src));
22296 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22297 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22298 ins_encode %{
22299 int vlen_enc = vector_length_encoding(this);
22300 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22301 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22302 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22303 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22304 %}
22305 ins_pipe( pipe_slow );
22306 %}
22307
22308 // --------------------------------- VectorMaskCmp --------------------------------------
22309
22310 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22311 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22312 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22313 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22314 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22315 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22316 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22317 ins_encode %{
22318 int vlen_enc = vector_length_encoding(this, $src1);
22319 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22320 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22321 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22322 } else {
22323 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22324 }
22325 %}
22326 ins_pipe( pipe_slow );
22327 %}
22328
22329 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22330 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22331 n->bottom_type()->isa_vectmask() == nullptr &&
22332 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22333 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22334 effect(TEMP ktmp);
22335 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22336 ins_encode %{
22337 int vlen_enc = Assembler::AVX_512bit;
22338 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22339 KRegister mask = k0; // The comparison itself is not being masked.
22340 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22341 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22342 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22343 } else {
22344 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22345 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22346 }
22347 %}
22348 ins_pipe( pipe_slow );
22349 %}
22350
22351 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22352 predicate(n->bottom_type()->isa_vectmask() &&
22353 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22354 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22355 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22356 ins_encode %{
22357 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22358 int vlen_enc = vector_length_encoding(this, $src1);
22359 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22360 KRegister mask = k0; // The comparison itself is not being masked.
22361 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22362 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22363 } else {
22364 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22365 }
22366 %}
22367 ins_pipe( pipe_slow );
22368 %}
22369
22370 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22371 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22372 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22373 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22374 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22375 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22376 (n->in(2)->get_int() == BoolTest::eq ||
22377 n->in(2)->get_int() == BoolTest::lt ||
22378 n->in(2)->get_int() == BoolTest::gt)); // cond
22379 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22380 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22381 ins_encode %{
22382 int vlen_enc = vector_length_encoding(this, $src1);
22383 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22384 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22385 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22386 %}
22387 ins_pipe( pipe_slow );
22388 %}
22389
22390 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22391 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22392 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22393 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22394 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22395 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22396 (n->in(2)->get_int() == BoolTest::ne ||
22397 n->in(2)->get_int() == BoolTest::le ||
22398 n->in(2)->get_int() == BoolTest::ge)); // cond
22399 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22400 effect(TEMP dst, TEMP xtmp);
22401 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22402 ins_encode %{
22403 int vlen_enc = vector_length_encoding(this, $src1);
22404 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22405 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22406 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22407 %}
22408 ins_pipe( pipe_slow );
22409 %}
22410
22411 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22412 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22413 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22414 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22415 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22416 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22417 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22418 effect(TEMP dst, TEMP xtmp);
22419 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22420 ins_encode %{
22421 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22422 int vlen_enc = vector_length_encoding(this, $src1);
22423 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22424 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22425
22426 if (vlen_enc == Assembler::AVX_128bit) {
22427 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22428 } else {
22429 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22430 }
22431 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22432 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22433 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22434 %}
22435 ins_pipe( pipe_slow );
22436 %}
22437
22438 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22439 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22440 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22441 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22442 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22443 effect(TEMP ktmp);
22444 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22445 ins_encode %{
22446 assert(UseAVX > 2, "required");
22447
22448 int vlen_enc = vector_length_encoding(this, $src1);
22449 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22450 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22451 KRegister mask = k0; // The comparison itself is not being masked.
22452 bool merge = false;
22453 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22454
22455 switch (src1_elem_bt) {
22456 case T_INT: {
22457 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22458 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22459 break;
22460 }
22461 case T_LONG: {
22462 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22463 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22464 break;
22465 }
22466 default: assert(false, "%s", type2name(src1_elem_bt));
22467 }
22468 %}
22469 ins_pipe( pipe_slow );
22470 %}
22471
22472
22473 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22474 predicate(n->bottom_type()->isa_vectmask() &&
22475 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22476 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22477 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22478 ins_encode %{
22479 assert(UseAVX > 2, "required");
22480 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22481
22482 int vlen_enc = vector_length_encoding(this, $src1);
22483 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22484 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22485 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22486
22487 // Comparison i
22488 switch (src1_elem_bt) {
22489 case T_BYTE: {
22490 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22491 break;
22492 }
22493 case T_SHORT: {
22494 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22495 break;
22496 }
22497 case T_INT: {
22498 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22499 break;
22500 }
22501 case T_LONG: {
22502 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22503 break;
22504 }
22505 default: assert(false, "%s", type2name(src1_elem_bt));
22506 }
22507 %}
22508 ins_pipe( pipe_slow );
22509 %}
22510
22511 // Extract
22512
22513 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22514 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22515 match(Set dst (ExtractI src idx));
22516 match(Set dst (ExtractS src idx));
22517 match(Set dst (ExtractB src idx));
22518 format %{ "extractI $dst,$src,$idx\t!" %}
22519 ins_encode %{
22520 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22521
22522 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22523 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22524 %}
22525 ins_pipe( pipe_slow );
22526 %}
22527
22528 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22529 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22530 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22531 match(Set dst (ExtractI src idx));
22532 match(Set dst (ExtractS src idx));
22533 match(Set dst (ExtractB src idx));
22534 effect(TEMP vtmp);
22535 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22536 ins_encode %{
22537 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22538
22539 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22540 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22541 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22542 %}
22543 ins_pipe( pipe_slow );
22544 %}
22545
22546 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22547 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22548 match(Set dst (ExtractL src idx));
22549 format %{ "extractL $dst,$src,$idx\t!" %}
22550 ins_encode %{
22551 assert(UseSSE >= 4, "required");
22552 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22553
22554 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22555 %}
22556 ins_pipe( pipe_slow );
22557 %}
22558
22559 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22560 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22561 Matcher::vector_length(n->in(1)) == 8); // src
22562 match(Set dst (ExtractL src idx));
22563 effect(TEMP vtmp);
22564 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22565 ins_encode %{
22566 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22567
22568 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22569 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22570 %}
22571 ins_pipe( pipe_slow );
22572 %}
22573
22574 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22575 predicate(Matcher::vector_length(n->in(1)) <= 4);
22576 match(Set dst (ExtractF src idx));
22577 effect(TEMP dst, TEMP vtmp);
22578 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22579 ins_encode %{
22580 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22581
22582 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22583 %}
22584 ins_pipe( pipe_slow );
22585 %}
22586
22587 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22588 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22589 Matcher::vector_length(n->in(1)/*src*/) == 16);
22590 match(Set dst (ExtractF src idx));
22591 effect(TEMP vtmp);
22592 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22593 ins_encode %{
22594 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22595
22596 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22597 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22598 %}
22599 ins_pipe( pipe_slow );
22600 %}
22601
22602 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22603 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22604 match(Set dst (ExtractD src idx));
22605 format %{ "extractD $dst,$src,$idx\t!" %}
22606 ins_encode %{
22607 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22608
22609 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22610 %}
22611 ins_pipe( pipe_slow );
22612 %}
22613
22614 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22615 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22616 Matcher::vector_length(n->in(1)) == 8); // src
22617 match(Set dst (ExtractD src idx));
22618 effect(TEMP vtmp);
22619 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22620 ins_encode %{
22621 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22622
22623 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22624 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22625 %}
22626 ins_pipe( pipe_slow );
22627 %}
22628
22629 // --------------------------------- Vector Blend --------------------------------------
22630
22631 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22632 predicate(UseAVX == 0);
22633 match(Set dst (VectorBlend (Binary dst src) mask));
22634 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22635 effect(TEMP tmp);
22636 ins_encode %{
22637 assert(UseSSE >= 4, "required");
22638
22639 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22640 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22641 }
22642 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22643 %}
22644 ins_pipe( pipe_slow );
22645 %}
22646
22647 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22648 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22649 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22650 Matcher::vector_length_in_bytes(n) <= 32 &&
22651 is_integral_type(Matcher::vector_element_basic_type(n)));
22652 match(Set dst (VectorBlend (Binary src1 src2) mask));
22653 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22654 ins_encode %{
22655 int vlen_enc = vector_length_encoding(this);
22656 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22657 %}
22658 ins_pipe( pipe_slow );
22659 %}
22660
22661 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22662 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22663 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22664 Matcher::vector_length_in_bytes(n) <= 32 &&
22665 !is_integral_type(Matcher::vector_element_basic_type(n)));
22666 match(Set dst (VectorBlend (Binary src1 src2) mask));
22667 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22668 ins_encode %{
22669 int vlen_enc = vector_length_encoding(this);
22670 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22671 %}
22672 ins_pipe( pipe_slow );
22673 %}
22674
22675 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22676 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22677 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22678 Matcher::vector_length_in_bytes(n) <= 32);
22679 match(Set dst (VectorBlend (Binary src1 src2) mask));
22680 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22681 effect(TEMP vtmp, TEMP dst);
22682 ins_encode %{
22683 int vlen_enc = vector_length_encoding(this);
22684 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22685 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22686 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22687 %}
22688 ins_pipe( pipe_slow );
22689 %}
22690
22691 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22692 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22693 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22694 match(Set dst (VectorBlend (Binary src1 src2) mask));
22695 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22696 effect(TEMP ktmp);
22697 ins_encode %{
22698 int vlen_enc = Assembler::AVX_512bit;
22699 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22700 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22701 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22702 %}
22703 ins_pipe( pipe_slow );
22704 %}
22705
22706
22707 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22708 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22709 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22710 VM_Version::supports_avx512bw()));
22711 match(Set dst (VectorBlend (Binary src1 src2) mask));
22712 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22713 ins_encode %{
22714 int vlen_enc = vector_length_encoding(this);
22715 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22716 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22717 %}
22718 ins_pipe( pipe_slow );
22719 %}
22720
22721 // --------------------------------- ABS --------------------------------------
22722 // a = |a|
22723 instruct vabsB_reg(vec dst, vec src) %{
22724 match(Set dst (AbsVB src));
22725 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22726 ins_encode %{
22727 uint vlen = Matcher::vector_length(this);
22728 if (vlen <= 16) {
22729 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22730 } else {
22731 int vlen_enc = vector_length_encoding(this);
22732 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22733 }
22734 %}
22735 ins_pipe( pipe_slow );
22736 %}
22737
22738 instruct vabsS_reg(vec dst, vec src) %{
22739 match(Set dst (AbsVS src));
22740 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22741 ins_encode %{
22742 uint vlen = Matcher::vector_length(this);
22743 if (vlen <= 8) {
22744 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22745 } else {
22746 int vlen_enc = vector_length_encoding(this);
22747 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22748 }
22749 %}
22750 ins_pipe( pipe_slow );
22751 %}
22752
22753 instruct vabsI_reg(vec dst, vec src) %{
22754 match(Set dst (AbsVI src));
22755 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22756 ins_encode %{
22757 uint vlen = Matcher::vector_length(this);
22758 if (vlen <= 4) {
22759 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22760 } else {
22761 int vlen_enc = vector_length_encoding(this);
22762 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22763 }
22764 %}
22765 ins_pipe( pipe_slow );
22766 %}
22767
22768 instruct vabsL_reg(vec dst, vec src) %{
22769 match(Set dst (AbsVL src));
22770 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22771 ins_encode %{
22772 assert(UseAVX > 2, "required");
22773 int vlen_enc = vector_length_encoding(this);
22774 if (!VM_Version::supports_avx512vl()) {
22775 vlen_enc = Assembler::AVX_512bit;
22776 }
22777 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22778 %}
22779 ins_pipe( pipe_slow );
22780 %}
22781
22782 // --------------------------------- ABSNEG --------------------------------------
22783
22784 instruct vabsnegF(vec dst, vec src) %{
22785 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22786 match(Set dst (AbsVF src));
22787 match(Set dst (NegVF src));
22788 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22789 ins_cost(150);
22790 ins_encode %{
22791 int opcode = this->ideal_Opcode();
22792 int vlen = Matcher::vector_length(this);
22793 if (vlen == 2) {
22794 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22795 } else {
22796 assert(vlen == 8 || vlen == 16, "required");
22797 int vlen_enc = vector_length_encoding(this);
22798 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22799 }
22800 %}
22801 ins_pipe( pipe_slow );
22802 %}
22803
22804 instruct vabsneg4F(vec dst) %{
22805 predicate(Matcher::vector_length(n) == 4);
22806 match(Set dst (AbsVF dst));
22807 match(Set dst (NegVF dst));
22808 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22809 ins_cost(150);
22810 ins_encode %{
22811 int opcode = this->ideal_Opcode();
22812 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22813 %}
22814 ins_pipe( pipe_slow );
22815 %}
22816
22817 instruct vabsnegD(vec dst, vec src) %{
22818 match(Set dst (AbsVD src));
22819 match(Set dst (NegVD src));
22820 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22821 ins_encode %{
22822 int opcode = this->ideal_Opcode();
22823 uint vlen = Matcher::vector_length(this);
22824 if (vlen == 2) {
22825 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22826 } else {
22827 int vlen_enc = vector_length_encoding(this);
22828 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22829 }
22830 %}
22831 ins_pipe( pipe_slow );
22832 %}
22833
22834 //------------------------------------- VectorTest --------------------------------------------
22835
22836 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22837 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22838 match(Set cr (VectorTest src1 src2));
22839 effect(TEMP vtmp);
22840 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22841 ins_encode %{
22842 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22843 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22844 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22845 %}
22846 ins_pipe( pipe_slow );
22847 %}
22848
22849 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22850 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22851 match(Set cr (VectorTest src1 src2));
22852 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22853 ins_encode %{
22854 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22855 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22856 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22857 %}
22858 ins_pipe( pipe_slow );
22859 %}
22860
22861 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22862 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22863 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22864 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22865 match(Set cr (VectorTest src1 src2));
22866 effect(TEMP tmp);
22867 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22868 ins_encode %{
22869 uint masklen = Matcher::vector_length(this, $src1);
22870 __ kmovwl($tmp$$Register, $src1$$KRegister);
22871 __ andl($tmp$$Register, (1 << masklen) - 1);
22872 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22873 %}
22874 ins_pipe( pipe_slow );
22875 %}
22876
22877 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22878 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22879 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22880 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22881 match(Set cr (VectorTest src1 src2));
22882 effect(TEMP tmp);
22883 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22884 ins_encode %{
22885 uint masklen = Matcher::vector_length(this, $src1);
22886 __ kmovwl($tmp$$Register, $src1$$KRegister);
22887 __ andl($tmp$$Register, (1 << masklen) - 1);
22888 %}
22889 ins_pipe( pipe_slow );
22890 %}
22891
22892 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22893 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22894 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22895 match(Set cr (VectorTest src1 src2));
22896 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22897 ins_encode %{
22898 uint masklen = Matcher::vector_length(this, $src1);
22899 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22900 %}
22901 ins_pipe( pipe_slow );
22902 %}
22903
22904 //------------------------------------- LoadMask --------------------------------------------
22905
22906 instruct loadMask(legVec dst, legVec src) %{
22907 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22908 match(Set dst (VectorLoadMask src));
22909 effect(TEMP dst);
22910 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22911 ins_encode %{
22912 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22913 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22914 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22915 %}
22916 ins_pipe( pipe_slow );
22917 %}
22918
22919 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22920 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22921 match(Set dst (VectorLoadMask src));
22922 effect(TEMP xtmp);
22923 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22924 ins_encode %{
22925 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22926 true, Assembler::AVX_512bit);
22927 %}
22928 ins_pipe( pipe_slow );
22929 %}
22930
22931 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22932 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22933 match(Set dst (VectorLoadMask src));
22934 effect(TEMP xtmp);
22935 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22936 ins_encode %{
22937 int vlen_enc = vector_length_encoding(in(1));
22938 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22939 false, vlen_enc);
22940 %}
22941 ins_pipe( pipe_slow );
22942 %}
22943
22944 //------------------------------------- StoreMask --------------------------------------------
22945
22946 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22947 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22948 match(Set dst (VectorStoreMask src size));
22949 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22950 ins_encode %{
22951 int vlen = Matcher::vector_length(this);
22952 if (vlen <= 16 && UseAVX <= 2) {
22953 assert(UseSSE >= 3, "required");
22954 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22955 } else {
22956 assert(UseAVX > 0, "required");
22957 int src_vlen_enc = vector_length_encoding(this, $src);
22958 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22959 }
22960 %}
22961 ins_pipe( pipe_slow );
22962 %}
22963
22964 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22965 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22966 match(Set dst (VectorStoreMask src size));
22967 effect(TEMP_DEF dst, TEMP xtmp);
22968 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22969 ins_encode %{
22970 int vlen_enc = Assembler::AVX_128bit;
22971 int vlen = Matcher::vector_length(this);
22972 if (vlen <= 8) {
22973 assert(UseSSE >= 3, "required");
22974 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22975 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22976 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22977 } else {
22978 assert(UseAVX > 0, "required");
22979 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22980 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22981 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22982 }
22983 %}
22984 ins_pipe( pipe_slow );
22985 %}
22986
22987 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22988 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22989 match(Set dst (VectorStoreMask src size));
22990 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22991 effect(TEMP_DEF dst, TEMP xtmp);
22992 ins_encode %{
22993 int vlen_enc = Assembler::AVX_128bit;
22994 int vlen = Matcher::vector_length(this);
22995 if (vlen <= 4) {
22996 assert(UseSSE >= 3, "required");
22997 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22998 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22999 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23000 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23001 } else {
23002 assert(UseAVX > 0, "required");
23003 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23004 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23005 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23006 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23007 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23008 }
23009 %}
23010 ins_pipe( pipe_slow );
23011 %}
23012
23013 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23014 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23015 match(Set dst (VectorStoreMask src size));
23016 effect(TEMP_DEF dst, TEMP xtmp);
23017 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23018 ins_encode %{
23019 assert(UseSSE >= 3, "required");
23020 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23021 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23022 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23023 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23024 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23025 %}
23026 ins_pipe( pipe_slow );
23027 %}
23028
23029 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23030 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23031 match(Set dst (VectorStoreMask src size));
23032 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23033 effect(TEMP_DEF dst, TEMP vtmp);
23034 ins_encode %{
23035 int vlen_enc = Assembler::AVX_128bit;
23036 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23037 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23038 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23039 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23040 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23041 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23042 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23043 %}
23044 ins_pipe( pipe_slow );
23045 %}
23046
23047 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23048 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23049 match(Set dst (VectorStoreMask src size));
23050 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23051 ins_encode %{
23052 int src_vlen_enc = vector_length_encoding(this, $src);
23053 int dst_vlen_enc = vector_length_encoding(this);
23054 if (!VM_Version::supports_avx512vl()) {
23055 src_vlen_enc = Assembler::AVX_512bit;
23056 }
23057 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23058 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23059 %}
23060 ins_pipe( pipe_slow );
23061 %}
23062
23063 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23064 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23065 match(Set dst (VectorStoreMask src size));
23066 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23067 ins_encode %{
23068 int src_vlen_enc = vector_length_encoding(this, $src);
23069 int dst_vlen_enc = vector_length_encoding(this);
23070 if (!VM_Version::supports_avx512vl()) {
23071 src_vlen_enc = Assembler::AVX_512bit;
23072 }
23073 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23074 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23075 %}
23076 ins_pipe( pipe_slow );
23077 %}
23078
23079 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23080 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23081 match(Set dst (VectorStoreMask mask size));
23082 effect(TEMP_DEF dst);
23083 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23084 ins_encode %{
23085 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23086 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23087 false, Assembler::AVX_512bit, noreg);
23088 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23089 %}
23090 ins_pipe( pipe_slow );
23091 %}
23092
23093 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23094 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23095 match(Set dst (VectorStoreMask mask size));
23096 effect(TEMP_DEF dst);
23097 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23098 ins_encode %{
23099 int dst_vlen_enc = vector_length_encoding(this);
23100 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23101 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23102 %}
23103 ins_pipe( pipe_slow );
23104 %}
23105
23106 instruct vmaskcast_evex(kReg dst) %{
23107 match(Set dst (VectorMaskCast dst));
23108 ins_cost(0);
23109 format %{ "vector_mask_cast $dst" %}
23110 ins_encode %{
23111 // empty
23112 %}
23113 ins_pipe(empty);
23114 %}
23115
23116 instruct vmaskcast(vec dst) %{
23117 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23118 match(Set dst (VectorMaskCast dst));
23119 ins_cost(0);
23120 format %{ "vector_mask_cast $dst" %}
23121 ins_encode %{
23122 // empty
23123 %}
23124 ins_pipe(empty);
23125 %}
23126
23127 instruct vmaskcast_avx(vec dst, vec src) %{
23128 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23129 match(Set dst (VectorMaskCast src));
23130 format %{ "vector_mask_cast $dst, $src" %}
23131 ins_encode %{
23132 int vlen = Matcher::vector_length(this);
23133 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23134 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23135 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23136 %}
23137 ins_pipe(pipe_slow);
23138 %}
23139
23140 //-------------------------------- Load Iota Indices ----------------------------------
23141
23142 instruct loadIotaIndices(vec dst, immI_0 src) %{
23143 match(Set dst (VectorLoadConst src));
23144 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23145 ins_encode %{
23146 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23147 BasicType bt = Matcher::vector_element_basic_type(this);
23148 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23149 %}
23150 ins_pipe( pipe_slow );
23151 %}
23152
23153 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23154 match(Set dst (PopulateIndex src1 src2));
23155 effect(TEMP dst, TEMP vtmp);
23156 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23157 ins_encode %{
23158 assert($src2$$constant == 1, "required");
23159 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23160 int vlen_enc = vector_length_encoding(this);
23161 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23162 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23163 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23164 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23165 %}
23166 ins_pipe( pipe_slow );
23167 %}
23168
23169 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23170 match(Set dst (PopulateIndex src1 src2));
23171 effect(TEMP dst, TEMP vtmp);
23172 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23173 ins_encode %{
23174 assert($src2$$constant == 1, "required");
23175 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23176 int vlen_enc = vector_length_encoding(this);
23177 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23178 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23179 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23180 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23181 %}
23182 ins_pipe( pipe_slow );
23183 %}
23184
23185 //-------------------------------- Rearrange ----------------------------------
23186
23187 // LoadShuffle/Rearrange for Byte
23188 instruct rearrangeB(vec dst, vec shuffle) %{
23189 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23190 Matcher::vector_length(n) < 32);
23191 match(Set dst (VectorRearrange dst shuffle));
23192 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23193 ins_encode %{
23194 assert(UseSSE >= 4, "required");
23195 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23196 %}
23197 ins_pipe( pipe_slow );
23198 %}
23199
23200 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23201 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23202 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23203 match(Set dst (VectorRearrange src shuffle));
23204 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23205 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23206 ins_encode %{
23207 assert(UseAVX >= 2, "required");
23208 // Swap src into vtmp1
23209 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23210 // Shuffle swapped src to get entries from other 128 bit lane
23211 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23212 // Shuffle original src to get entries from self 128 bit lane
23213 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23214 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23215 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23216 // Perform the blend
23217 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23218 %}
23219 ins_pipe( pipe_slow );
23220 %}
23221
23222
23223 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23224 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23225 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23226 match(Set dst (VectorRearrange src shuffle));
23227 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23228 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23229 ins_encode %{
23230 int vlen_enc = vector_length_encoding(this);
23231 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23232 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23233 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23234 %}
23235 ins_pipe( pipe_slow );
23236 %}
23237
23238 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23239 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23240 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23241 match(Set dst (VectorRearrange src shuffle));
23242 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23243 ins_encode %{
23244 int vlen_enc = vector_length_encoding(this);
23245 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23246 %}
23247 ins_pipe( pipe_slow );
23248 %}
23249
23250 // LoadShuffle/Rearrange for Short
23251
23252 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23253 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23254 !VM_Version::supports_avx512bw());
23255 match(Set dst (VectorLoadShuffle src));
23256 effect(TEMP dst, TEMP vtmp);
23257 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23258 ins_encode %{
23259 // Create a byte shuffle mask from short shuffle mask
23260 // only byte shuffle instruction available on these platforms
23261 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23262 if (UseAVX == 0) {
23263 assert(vlen_in_bytes <= 16, "required");
23264 // Multiply each shuffle by two to get byte index
23265 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23266 __ psllw($vtmp$$XMMRegister, 1);
23267
23268 // Duplicate to create 2 copies of byte index
23269 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23270 __ psllw($dst$$XMMRegister, 8);
23271 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23272
23273 // Add one to get alternate byte index
23274 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23275 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23276 } else {
23277 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23278 int vlen_enc = vector_length_encoding(this);
23279 // Multiply each shuffle by two to get byte index
23280 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23281
23282 // Duplicate to create 2 copies of byte index
23283 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23284 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23285
23286 // Add one to get alternate byte index
23287 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23288 }
23289 %}
23290 ins_pipe( pipe_slow );
23291 %}
23292
23293 instruct rearrangeS(vec dst, vec shuffle) %{
23294 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23295 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23296 match(Set dst (VectorRearrange dst shuffle));
23297 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23298 ins_encode %{
23299 assert(UseSSE >= 4, "required");
23300 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23301 %}
23302 ins_pipe( pipe_slow );
23303 %}
23304
23305 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23306 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23307 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23308 match(Set dst (VectorRearrange src shuffle));
23309 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23310 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23311 ins_encode %{
23312 assert(UseAVX >= 2, "required");
23313 // Swap src into vtmp1
23314 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23315 // Shuffle swapped src to get entries from other 128 bit lane
23316 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23317 // Shuffle original src to get entries from self 128 bit lane
23318 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23319 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23320 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23321 // Perform the blend
23322 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23323 %}
23324 ins_pipe( pipe_slow );
23325 %}
23326
23327 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23328 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23329 VM_Version::supports_avx512bw());
23330 match(Set dst (VectorRearrange src shuffle));
23331 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23332 ins_encode %{
23333 int vlen_enc = vector_length_encoding(this);
23334 if (!VM_Version::supports_avx512vl()) {
23335 vlen_enc = Assembler::AVX_512bit;
23336 }
23337 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23338 %}
23339 ins_pipe( pipe_slow );
23340 %}
23341
23342 // LoadShuffle/Rearrange for Integer and Float
23343
23344 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23345 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23346 Matcher::vector_length(n) == 4 && UseAVX == 0);
23347 match(Set dst (VectorLoadShuffle src));
23348 effect(TEMP dst, TEMP vtmp);
23349 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23350 ins_encode %{
23351 assert(UseSSE >= 4, "required");
23352
23353 // Create a byte shuffle mask from int shuffle mask
23354 // only byte shuffle instruction available on these platforms
23355
23356 // Duplicate and multiply each shuffle by 4
23357 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23358 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23359 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23360 __ psllw($vtmp$$XMMRegister, 2);
23361
23362 // Duplicate again to create 4 copies of byte index
23363 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23364 __ psllw($dst$$XMMRegister, 8);
23365 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23366
23367 // Add 3,2,1,0 to get alternate byte index
23368 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23369 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23370 %}
23371 ins_pipe( pipe_slow );
23372 %}
23373
23374 instruct rearrangeI(vec dst, vec shuffle) %{
23375 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23376 UseAVX == 0);
23377 match(Set dst (VectorRearrange dst shuffle));
23378 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23379 ins_encode %{
23380 assert(UseSSE >= 4, "required");
23381 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23382 %}
23383 ins_pipe( pipe_slow );
23384 %}
23385
23386 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23387 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23388 UseAVX > 0);
23389 match(Set dst (VectorRearrange src shuffle));
23390 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23391 ins_encode %{
23392 int vlen_enc = vector_length_encoding(this);
23393 BasicType bt = Matcher::vector_element_basic_type(this);
23394 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23395 %}
23396 ins_pipe( pipe_slow );
23397 %}
23398
23399 // LoadShuffle/Rearrange for Long and Double
23400
23401 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23402 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23403 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23404 match(Set dst (VectorLoadShuffle src));
23405 effect(TEMP dst, TEMP vtmp);
23406 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23407 ins_encode %{
23408 assert(UseAVX >= 2, "required");
23409
23410 int vlen_enc = vector_length_encoding(this);
23411 // Create a double word shuffle mask from long shuffle mask
23412 // only double word shuffle instruction available on these platforms
23413
23414 // Multiply each shuffle by two to get double word index
23415 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23416
23417 // Duplicate each double word shuffle
23418 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23419 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23420
23421 // Add one to get alternate double word index
23422 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23423 %}
23424 ins_pipe( pipe_slow );
23425 %}
23426
23427 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23428 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23429 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23430 match(Set dst (VectorRearrange src shuffle));
23431 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23432 ins_encode %{
23433 assert(UseAVX >= 2, "required");
23434
23435 int vlen_enc = vector_length_encoding(this);
23436 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23437 %}
23438 ins_pipe( pipe_slow );
23439 %}
23440
23441 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23442 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23443 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23444 match(Set dst (VectorRearrange src shuffle));
23445 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23446 ins_encode %{
23447 assert(UseAVX > 2, "required");
23448
23449 int vlen_enc = vector_length_encoding(this);
23450 if (vlen_enc == Assembler::AVX_128bit) {
23451 vlen_enc = Assembler::AVX_256bit;
23452 }
23453 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23454 %}
23455 ins_pipe( pipe_slow );
23456 %}
23457
23458 // --------------------------------- FMA --------------------------------------
23459 // a * b + c
23460
23461 instruct vfmaF_reg(vec a, vec b, vec c) %{
23462 match(Set c (FmaVF c (Binary a b)));
23463 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23464 ins_cost(150);
23465 ins_encode %{
23466 assert(UseFMA, "not enabled");
23467 int vlen_enc = vector_length_encoding(this);
23468 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23469 %}
23470 ins_pipe( pipe_slow );
23471 %}
23472
23473 instruct vfmaF_mem(vec a, memory b, vec c) %{
23474 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23475 match(Set c (FmaVF c (Binary a (LoadVector b))));
23476 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23477 ins_cost(150);
23478 ins_encode %{
23479 assert(UseFMA, "not enabled");
23480 int vlen_enc = vector_length_encoding(this);
23481 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23482 %}
23483 ins_pipe( pipe_slow );
23484 %}
23485
23486 instruct vfmaD_reg(vec a, vec b, vec c) %{
23487 match(Set c (FmaVD c (Binary a b)));
23488 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23489 ins_cost(150);
23490 ins_encode %{
23491 assert(UseFMA, "not enabled");
23492 int vlen_enc = vector_length_encoding(this);
23493 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23494 %}
23495 ins_pipe( pipe_slow );
23496 %}
23497
23498 instruct vfmaD_mem(vec a, memory b, vec c) %{
23499 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23500 match(Set c (FmaVD c (Binary a (LoadVector b))));
23501 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23502 ins_cost(150);
23503 ins_encode %{
23504 assert(UseFMA, "not enabled");
23505 int vlen_enc = vector_length_encoding(this);
23506 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23507 %}
23508 ins_pipe( pipe_slow );
23509 %}
23510
23511 // --------------------------------- Vector Multiply Add --------------------------------------
23512
23513 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23514 predicate(UseAVX == 0);
23515 match(Set dst (MulAddVS2VI dst src1));
23516 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23517 ins_encode %{
23518 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23519 %}
23520 ins_pipe( pipe_slow );
23521 %}
23522
23523 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23524 predicate(UseAVX > 0);
23525 match(Set dst (MulAddVS2VI src1 src2));
23526 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23527 ins_encode %{
23528 int vlen_enc = vector_length_encoding(this);
23529 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23530 %}
23531 ins_pipe( pipe_slow );
23532 %}
23533
23534 // --------------------------------- Vector Multiply Add Add ----------------------------------
23535
23536 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23537 predicate(VM_Version::supports_avx512_vnni());
23538 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23539 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23540 ins_encode %{
23541 assert(UseAVX > 2, "required");
23542 int vlen_enc = vector_length_encoding(this);
23543 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23544 %}
23545 ins_pipe( pipe_slow );
23546 ins_cost(10);
23547 %}
23548
23549 // --------------------------------- PopCount --------------------------------------
23550
23551 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23552 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23553 match(Set dst (PopCountVI src));
23554 match(Set dst (PopCountVL src));
23555 format %{ "vector_popcount_integral $dst, $src" %}
23556 ins_encode %{
23557 int opcode = this->ideal_Opcode();
23558 int vlen_enc = vector_length_encoding(this, $src);
23559 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23560 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23561 %}
23562 ins_pipe( pipe_slow );
23563 %}
23564
23565 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23566 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23567 match(Set dst (PopCountVI src mask));
23568 match(Set dst (PopCountVL src mask));
23569 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23570 ins_encode %{
23571 int vlen_enc = vector_length_encoding(this, $src);
23572 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23573 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23574 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23575 %}
23576 ins_pipe( pipe_slow );
23577 %}
23578
23579 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23580 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23581 match(Set dst (PopCountVI src));
23582 match(Set dst (PopCountVL src));
23583 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23584 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23585 ins_encode %{
23586 int opcode = this->ideal_Opcode();
23587 int vlen_enc = vector_length_encoding(this, $src);
23588 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23589 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23590 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23591 %}
23592 ins_pipe( pipe_slow );
23593 %}
23594
23595 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23596
23597 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23598 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23599 Matcher::vector_length_in_bytes(n->in(1))));
23600 match(Set dst (CountTrailingZerosV src));
23601 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23602 ins_cost(400);
23603 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23604 ins_encode %{
23605 int vlen_enc = vector_length_encoding(this, $src);
23606 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23607 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23608 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23609 %}
23610 ins_pipe( pipe_slow );
23611 %}
23612
23613 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23614 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23615 VM_Version::supports_avx512cd() &&
23616 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23617 match(Set dst (CountTrailingZerosV src));
23618 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23619 ins_cost(400);
23620 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23621 ins_encode %{
23622 int vlen_enc = vector_length_encoding(this, $src);
23623 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23624 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23625 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23626 %}
23627 ins_pipe( pipe_slow );
23628 %}
23629
23630 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23631 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23632 match(Set dst (CountTrailingZerosV src));
23633 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23634 ins_cost(400);
23635 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23636 ins_encode %{
23637 int vlen_enc = vector_length_encoding(this, $src);
23638 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23639 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23640 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23641 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23642 %}
23643 ins_pipe( pipe_slow );
23644 %}
23645
23646 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23647 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23648 match(Set dst (CountTrailingZerosV src));
23649 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23650 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23651 ins_encode %{
23652 int vlen_enc = vector_length_encoding(this, $src);
23653 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23654 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23655 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23656 %}
23657 ins_pipe( pipe_slow );
23658 %}
23659
23660
23661 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23662
23663 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23664 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23665 effect(TEMP dst);
23666 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23667 ins_encode %{
23668 int vector_len = vector_length_encoding(this);
23669 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23670 %}
23671 ins_pipe( pipe_slow );
23672 %}
23673
23674 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23675 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23676 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23677 effect(TEMP dst);
23678 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23679 ins_encode %{
23680 int vector_len = vector_length_encoding(this);
23681 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23682 %}
23683 ins_pipe( pipe_slow );
23684 %}
23685
23686 // --------------------------------- Rotation Operations ----------------------------------
23687 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23688 match(Set dst (RotateLeftV src shift));
23689 match(Set dst (RotateRightV src shift));
23690 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23691 ins_encode %{
23692 int opcode = this->ideal_Opcode();
23693 int vector_len = vector_length_encoding(this);
23694 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23695 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23696 %}
23697 ins_pipe( pipe_slow );
23698 %}
23699
23700 instruct vprorate(vec dst, vec src, vec shift) %{
23701 match(Set dst (RotateLeftV src shift));
23702 match(Set dst (RotateRightV src shift));
23703 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23704 ins_encode %{
23705 int opcode = this->ideal_Opcode();
23706 int vector_len = vector_length_encoding(this);
23707 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23708 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23709 %}
23710 ins_pipe( pipe_slow );
23711 %}
23712
23713 // ---------------------------------- Masked Operations ------------------------------------
23714 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23715 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23716 match(Set dst (LoadVectorMasked mem mask));
23717 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23718 ins_encode %{
23719 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23720 int vlen_enc = vector_length_encoding(this);
23721 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23722 %}
23723 ins_pipe( pipe_slow );
23724 %}
23725
23726
23727 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23728 predicate(n->in(3)->bottom_type()->isa_vectmask());
23729 match(Set dst (LoadVectorMasked mem mask));
23730 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23731 ins_encode %{
23732 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23733 int vector_len = vector_length_encoding(this);
23734 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23735 %}
23736 ins_pipe( pipe_slow );
23737 %}
23738
23739 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23740 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23741 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23742 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23743 ins_encode %{
23744 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23745 int vlen_enc = vector_length_encoding(src_node);
23746 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23747 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23748 %}
23749 ins_pipe( pipe_slow );
23750 %}
23751
23752 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23753 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23754 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23755 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23756 ins_encode %{
23757 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23758 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23759 int vlen_enc = vector_length_encoding(src_node);
23760 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23761 %}
23762 ins_pipe( pipe_slow );
23763 %}
23764
23765 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23766 match(Set addr (VerifyVectorAlignment addr mask));
23767 effect(KILL cr);
23768 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23769 ins_encode %{
23770 Label Lskip;
23771 // check if masked bits of addr are zero
23772 __ testq($addr$$Register, $mask$$constant);
23773 __ jccb(Assembler::equal, Lskip);
23774 __ stop("verify_vector_alignment found a misaligned vector memory access");
23775 __ bind(Lskip);
23776 %}
23777 ins_pipe(pipe_slow);
23778 %}
23779
23780 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23781 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23782 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23783 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23784 ins_encode %{
23785 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23786 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23787
23788 Label DONE;
23789 int vlen_enc = vector_length_encoding(this, $src1);
23790 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23791
23792 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23793 __ mov64($dst$$Register, -1L);
23794 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23795 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23796 __ jccb(Assembler::carrySet, DONE);
23797 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23798 __ notq($dst$$Register);
23799 __ tzcntq($dst$$Register, $dst$$Register);
23800 __ bind(DONE);
23801 %}
23802 ins_pipe( pipe_slow );
23803 %}
23804
23805
23806 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23807 match(Set dst (VectorMaskGen len));
23808 effect(TEMP temp, KILL cr);
23809 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23810 ins_encode %{
23811 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23812 %}
23813 ins_pipe( pipe_slow );
23814 %}
23815
23816 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23817 match(Set dst (VectorMaskGen len));
23818 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23819 effect(TEMP temp);
23820 ins_encode %{
23821 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23822 __ kmovql($dst$$KRegister, $temp$$Register);
23823 %}
23824 ins_pipe( pipe_slow );
23825 %}
23826
23827 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23828 predicate(n->in(1)->bottom_type()->isa_vectmask());
23829 match(Set dst (VectorMaskToLong mask));
23830 effect(TEMP dst, KILL cr);
23831 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23832 ins_encode %{
23833 int opcode = this->ideal_Opcode();
23834 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23835 int mask_len = Matcher::vector_length(this, $mask);
23836 int mask_size = mask_len * type2aelembytes(mbt);
23837 int vlen_enc = vector_length_encoding(this, $mask);
23838 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23839 $dst$$Register, mask_len, mask_size, vlen_enc);
23840 %}
23841 ins_pipe( pipe_slow );
23842 %}
23843
23844 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23845 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23846 match(Set dst (VectorMaskToLong mask));
23847 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23848 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23849 ins_encode %{
23850 int opcode = this->ideal_Opcode();
23851 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23852 int mask_len = Matcher::vector_length(this, $mask);
23853 int vlen_enc = vector_length_encoding(this, $mask);
23854 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23855 $dst$$Register, mask_len, mbt, vlen_enc);
23856 %}
23857 ins_pipe( pipe_slow );
23858 %}
23859
23860 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23861 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23862 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23863 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23864 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23865 ins_encode %{
23866 int opcode = this->ideal_Opcode();
23867 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23868 int mask_len = Matcher::vector_length(this, $mask);
23869 int vlen_enc = vector_length_encoding(this, $mask);
23870 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23871 $dst$$Register, mask_len, mbt, vlen_enc);
23872 %}
23873 ins_pipe( pipe_slow );
23874 %}
23875
23876 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23877 predicate(n->in(1)->bottom_type()->isa_vectmask());
23878 match(Set dst (VectorMaskTrueCount mask));
23879 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23880 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23881 ins_encode %{
23882 int opcode = this->ideal_Opcode();
23883 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23884 int mask_len = Matcher::vector_length(this, $mask);
23885 int mask_size = mask_len * type2aelembytes(mbt);
23886 int vlen_enc = vector_length_encoding(this, $mask);
23887 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23888 $tmp$$Register, mask_len, mask_size, vlen_enc);
23889 %}
23890 ins_pipe( pipe_slow );
23891 %}
23892
23893 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23894 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23895 match(Set dst (VectorMaskTrueCount mask));
23896 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23897 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23898 ins_encode %{
23899 int opcode = this->ideal_Opcode();
23900 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23901 int mask_len = Matcher::vector_length(this, $mask);
23902 int vlen_enc = vector_length_encoding(this, $mask);
23903 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23904 $tmp$$Register, mask_len, mbt, vlen_enc);
23905 %}
23906 ins_pipe( pipe_slow );
23907 %}
23908
23909 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23910 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23911 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23912 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23913 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23914 ins_encode %{
23915 int opcode = this->ideal_Opcode();
23916 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23917 int mask_len = Matcher::vector_length(this, $mask);
23918 int vlen_enc = vector_length_encoding(this, $mask);
23919 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23920 $tmp$$Register, mask_len, mbt, vlen_enc);
23921 %}
23922 ins_pipe( pipe_slow );
23923 %}
23924
23925 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23926 predicate(n->in(1)->bottom_type()->isa_vectmask());
23927 match(Set dst (VectorMaskFirstTrue mask));
23928 match(Set dst (VectorMaskLastTrue mask));
23929 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23930 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23931 ins_encode %{
23932 int opcode = this->ideal_Opcode();
23933 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23934 int mask_len = Matcher::vector_length(this, $mask);
23935 int mask_size = mask_len * type2aelembytes(mbt);
23936 int vlen_enc = vector_length_encoding(this, $mask);
23937 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23938 $tmp$$Register, mask_len, mask_size, vlen_enc);
23939 %}
23940 ins_pipe( pipe_slow );
23941 %}
23942
23943 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23944 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23945 match(Set dst (VectorMaskFirstTrue mask));
23946 match(Set dst (VectorMaskLastTrue mask));
23947 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23948 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23949 ins_encode %{
23950 int opcode = this->ideal_Opcode();
23951 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23952 int mask_len = Matcher::vector_length(this, $mask);
23953 int vlen_enc = vector_length_encoding(this, $mask);
23954 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23955 $tmp$$Register, mask_len, mbt, vlen_enc);
23956 %}
23957 ins_pipe( pipe_slow );
23958 %}
23959
23960 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23961 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23962 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23963 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23964 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23965 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23966 ins_encode %{
23967 int opcode = this->ideal_Opcode();
23968 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23969 int mask_len = Matcher::vector_length(this, $mask);
23970 int vlen_enc = vector_length_encoding(this, $mask);
23971 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23972 $tmp$$Register, mask_len, mbt, vlen_enc);
23973 %}
23974 ins_pipe( pipe_slow );
23975 %}
23976
23977 // --------------------------------- Compress/Expand Operations ---------------------------
23978 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23979 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23980 match(Set dst (CompressV src mask));
23981 match(Set dst (ExpandV src mask));
23982 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23983 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23984 ins_encode %{
23985 int opcode = this->ideal_Opcode();
23986 int vlen_enc = vector_length_encoding(this);
23987 BasicType bt = Matcher::vector_element_basic_type(this);
23988 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23989 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23990 %}
23991 ins_pipe( pipe_slow );
23992 %}
23993
23994 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23995 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23996 match(Set dst (CompressV src mask));
23997 match(Set dst (ExpandV src mask));
23998 format %{ "vector_compress_expand $dst, $src, $mask" %}
23999 ins_encode %{
24000 int opcode = this->ideal_Opcode();
24001 int vector_len = vector_length_encoding(this);
24002 BasicType bt = Matcher::vector_element_basic_type(this);
24003 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24004 %}
24005 ins_pipe( pipe_slow );
24006 %}
24007
24008 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24009 match(Set dst (CompressM mask));
24010 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24011 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24012 ins_encode %{
24013 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24014 int mask_len = Matcher::vector_length(this);
24015 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24016 %}
24017 ins_pipe( pipe_slow );
24018 %}
24019
24020 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24021
24022 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24023 predicate(!VM_Version::supports_gfni());
24024 match(Set dst (ReverseV src));
24025 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24026 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24027 ins_encode %{
24028 int vec_enc = vector_length_encoding(this);
24029 BasicType bt = Matcher::vector_element_basic_type(this);
24030 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24031 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24032 %}
24033 ins_pipe( pipe_slow );
24034 %}
24035
24036 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24037 predicate(VM_Version::supports_gfni());
24038 match(Set dst (ReverseV src));
24039 effect(TEMP dst, TEMP xtmp);
24040 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24041 ins_encode %{
24042 int vec_enc = vector_length_encoding(this);
24043 BasicType bt = Matcher::vector_element_basic_type(this);
24044 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24045 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24046 $xtmp$$XMMRegister);
24047 %}
24048 ins_pipe( pipe_slow );
24049 %}
24050
24051 instruct vreverse_byte_reg(vec dst, vec src) %{
24052 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24053 match(Set dst (ReverseBytesV src));
24054 effect(TEMP dst);
24055 format %{ "vector_reverse_byte $dst, $src" %}
24056 ins_encode %{
24057 int vec_enc = vector_length_encoding(this);
24058 BasicType bt = Matcher::vector_element_basic_type(this);
24059 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24060 %}
24061 ins_pipe( pipe_slow );
24062 %}
24063
24064 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24065 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24066 match(Set dst (ReverseBytesV src));
24067 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24068 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24069 ins_encode %{
24070 int vec_enc = vector_length_encoding(this);
24071 BasicType bt = Matcher::vector_element_basic_type(this);
24072 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24073 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24074 %}
24075 ins_pipe( pipe_slow );
24076 %}
24077
24078 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24079
24080 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24081 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24082 Matcher::vector_length_in_bytes(n->in(1))));
24083 match(Set dst (CountLeadingZerosV src));
24084 format %{ "vector_count_leading_zeros $dst, $src" %}
24085 ins_encode %{
24086 int vlen_enc = vector_length_encoding(this, $src);
24087 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24088 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24089 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24090 %}
24091 ins_pipe( pipe_slow );
24092 %}
24093
24094 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24095 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24096 Matcher::vector_length_in_bytes(n->in(1))));
24097 match(Set dst (CountLeadingZerosV src mask));
24098 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24099 ins_encode %{
24100 int vlen_enc = vector_length_encoding(this, $src);
24101 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24102 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24103 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24104 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24105 %}
24106 ins_pipe( pipe_slow );
24107 %}
24108
24109 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24110 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24111 VM_Version::supports_avx512cd() &&
24112 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24113 match(Set dst (CountLeadingZerosV src));
24114 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24115 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24116 ins_encode %{
24117 int vlen_enc = vector_length_encoding(this, $src);
24118 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24119 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24120 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24121 %}
24122 ins_pipe( pipe_slow );
24123 %}
24124
24125 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24126 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24127 match(Set dst (CountLeadingZerosV src));
24128 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24129 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24130 ins_encode %{
24131 int vlen_enc = vector_length_encoding(this, $src);
24132 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24133 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24134 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24135 $rtmp$$Register, true, vlen_enc);
24136 %}
24137 ins_pipe( pipe_slow );
24138 %}
24139
24140 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24141 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24142 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24143 match(Set dst (CountLeadingZerosV src));
24144 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24145 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24146 ins_encode %{
24147 int vlen_enc = vector_length_encoding(this, $src);
24148 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24149 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24150 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24151 %}
24152 ins_pipe( pipe_slow );
24153 %}
24154
24155 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24156 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24157 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24158 match(Set dst (CountLeadingZerosV src));
24159 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24160 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24161 ins_encode %{
24162 int vlen_enc = vector_length_encoding(this, $src);
24163 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24164 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24165 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24166 %}
24167 ins_pipe( pipe_slow );
24168 %}
24169
24170 // ---------------------------------- Vector Masked Operations ------------------------------------
24171
24172 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24173 match(Set dst (AddVB (Binary dst src2) mask));
24174 match(Set dst (AddVS (Binary dst src2) mask));
24175 match(Set dst (AddVI (Binary dst src2) mask));
24176 match(Set dst (AddVL (Binary dst src2) mask));
24177 match(Set dst (AddVF (Binary dst src2) mask));
24178 match(Set dst (AddVD (Binary dst src2) mask));
24179 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24180 ins_encode %{
24181 int vlen_enc = vector_length_encoding(this);
24182 BasicType bt = Matcher::vector_element_basic_type(this);
24183 int opc = this->ideal_Opcode();
24184 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24185 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24186 %}
24187 ins_pipe( pipe_slow );
24188 %}
24189
24190 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24191 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24192 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24193 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24194 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24195 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24196 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24197 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24198 ins_encode %{
24199 int vlen_enc = vector_length_encoding(this);
24200 BasicType bt = Matcher::vector_element_basic_type(this);
24201 int opc = this->ideal_Opcode();
24202 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24203 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24204 %}
24205 ins_pipe( pipe_slow );
24206 %}
24207
24208 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24209 match(Set dst (XorV (Binary dst src2) mask));
24210 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24211 ins_encode %{
24212 int vlen_enc = vector_length_encoding(this);
24213 BasicType bt = Matcher::vector_element_basic_type(this);
24214 int opc = this->ideal_Opcode();
24215 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24216 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24217 %}
24218 ins_pipe( pipe_slow );
24219 %}
24220
24221 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24222 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24223 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24224 ins_encode %{
24225 int vlen_enc = vector_length_encoding(this);
24226 BasicType bt = Matcher::vector_element_basic_type(this);
24227 int opc = this->ideal_Opcode();
24228 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24229 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24230 %}
24231 ins_pipe( pipe_slow );
24232 %}
24233
24234 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24235 match(Set dst (OrV (Binary dst src2) mask));
24236 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24237 ins_encode %{
24238 int vlen_enc = vector_length_encoding(this);
24239 BasicType bt = Matcher::vector_element_basic_type(this);
24240 int opc = this->ideal_Opcode();
24241 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24242 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24243 %}
24244 ins_pipe( pipe_slow );
24245 %}
24246
24247 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24248 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24249 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24250 ins_encode %{
24251 int vlen_enc = vector_length_encoding(this);
24252 BasicType bt = Matcher::vector_element_basic_type(this);
24253 int opc = this->ideal_Opcode();
24254 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24255 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24256 %}
24257 ins_pipe( pipe_slow );
24258 %}
24259
24260 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24261 match(Set dst (AndV (Binary dst src2) mask));
24262 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24263 ins_encode %{
24264 int vlen_enc = vector_length_encoding(this);
24265 BasicType bt = Matcher::vector_element_basic_type(this);
24266 int opc = this->ideal_Opcode();
24267 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24268 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24269 %}
24270 ins_pipe( pipe_slow );
24271 %}
24272
24273 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24274 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24275 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24276 ins_encode %{
24277 int vlen_enc = vector_length_encoding(this);
24278 BasicType bt = Matcher::vector_element_basic_type(this);
24279 int opc = this->ideal_Opcode();
24280 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24281 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24282 %}
24283 ins_pipe( pipe_slow );
24284 %}
24285
24286 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24287 match(Set dst (SubVB (Binary dst src2) mask));
24288 match(Set dst (SubVS (Binary dst src2) mask));
24289 match(Set dst (SubVI (Binary dst src2) mask));
24290 match(Set dst (SubVL (Binary dst src2) mask));
24291 match(Set dst (SubVF (Binary dst src2) mask));
24292 match(Set dst (SubVD (Binary dst src2) mask));
24293 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24294 ins_encode %{
24295 int vlen_enc = vector_length_encoding(this);
24296 BasicType bt = Matcher::vector_element_basic_type(this);
24297 int opc = this->ideal_Opcode();
24298 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24299 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24300 %}
24301 ins_pipe( pipe_slow );
24302 %}
24303
24304 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24305 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24306 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24307 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24308 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24309 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24310 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24311 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24312 ins_encode %{
24313 int vlen_enc = vector_length_encoding(this);
24314 BasicType bt = Matcher::vector_element_basic_type(this);
24315 int opc = this->ideal_Opcode();
24316 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24317 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24318 %}
24319 ins_pipe( pipe_slow );
24320 %}
24321
24322 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24323 match(Set dst (MulVS (Binary dst src2) mask));
24324 match(Set dst (MulVI (Binary dst src2) mask));
24325 match(Set dst (MulVL (Binary dst src2) mask));
24326 match(Set dst (MulVF (Binary dst src2) mask));
24327 match(Set dst (MulVD (Binary dst src2) mask));
24328 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24329 ins_encode %{
24330 int vlen_enc = vector_length_encoding(this);
24331 BasicType bt = Matcher::vector_element_basic_type(this);
24332 int opc = this->ideal_Opcode();
24333 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24334 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24335 %}
24336 ins_pipe( pipe_slow );
24337 %}
24338
24339 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24340 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24341 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24342 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24343 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24344 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24345 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24346 ins_encode %{
24347 int vlen_enc = vector_length_encoding(this);
24348 BasicType bt = Matcher::vector_element_basic_type(this);
24349 int opc = this->ideal_Opcode();
24350 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24351 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24352 %}
24353 ins_pipe( pipe_slow );
24354 %}
24355
24356 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24357 match(Set dst (SqrtVF dst mask));
24358 match(Set dst (SqrtVD dst mask));
24359 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24360 ins_encode %{
24361 int vlen_enc = vector_length_encoding(this);
24362 BasicType bt = Matcher::vector_element_basic_type(this);
24363 int opc = this->ideal_Opcode();
24364 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24365 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24366 %}
24367 ins_pipe( pipe_slow );
24368 %}
24369
24370 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24371 match(Set dst (DivVF (Binary dst src2) mask));
24372 match(Set dst (DivVD (Binary dst src2) mask));
24373 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24374 ins_encode %{
24375 int vlen_enc = vector_length_encoding(this);
24376 BasicType bt = Matcher::vector_element_basic_type(this);
24377 int opc = this->ideal_Opcode();
24378 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24379 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24380 %}
24381 ins_pipe( pipe_slow );
24382 %}
24383
24384 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24385 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24386 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24387 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24388 ins_encode %{
24389 int vlen_enc = vector_length_encoding(this);
24390 BasicType bt = Matcher::vector_element_basic_type(this);
24391 int opc = this->ideal_Opcode();
24392 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24393 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24394 %}
24395 ins_pipe( pipe_slow );
24396 %}
24397
24398
24399 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24400 match(Set dst (RotateLeftV (Binary dst shift) mask));
24401 match(Set dst (RotateRightV (Binary dst shift) mask));
24402 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24403 ins_encode %{
24404 int vlen_enc = vector_length_encoding(this);
24405 BasicType bt = Matcher::vector_element_basic_type(this);
24406 int opc = this->ideal_Opcode();
24407 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24408 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24409 %}
24410 ins_pipe( pipe_slow );
24411 %}
24412
24413 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24414 match(Set dst (RotateLeftV (Binary dst src2) mask));
24415 match(Set dst (RotateRightV (Binary dst src2) mask));
24416 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24417 ins_encode %{
24418 int vlen_enc = vector_length_encoding(this);
24419 BasicType bt = Matcher::vector_element_basic_type(this);
24420 int opc = this->ideal_Opcode();
24421 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24422 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24423 %}
24424 ins_pipe( pipe_slow );
24425 %}
24426
24427 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24428 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24429 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24430 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24431 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24432 ins_encode %{
24433 int vlen_enc = vector_length_encoding(this);
24434 BasicType bt = Matcher::vector_element_basic_type(this);
24435 int opc = this->ideal_Opcode();
24436 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24437 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24438 %}
24439 ins_pipe( pipe_slow );
24440 %}
24441
24442 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24443 predicate(!n->as_ShiftV()->is_var_shift());
24444 match(Set dst (LShiftVS (Binary dst src2) mask));
24445 match(Set dst (LShiftVI (Binary dst src2) mask));
24446 match(Set dst (LShiftVL (Binary dst src2) mask));
24447 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24448 ins_encode %{
24449 int vlen_enc = vector_length_encoding(this);
24450 BasicType bt = Matcher::vector_element_basic_type(this);
24451 int opc = this->ideal_Opcode();
24452 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24453 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24454 %}
24455 ins_pipe( pipe_slow );
24456 %}
24457
24458 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24459 predicate(n->as_ShiftV()->is_var_shift());
24460 match(Set dst (LShiftVS (Binary dst src2) mask));
24461 match(Set dst (LShiftVI (Binary dst src2) mask));
24462 match(Set dst (LShiftVL (Binary dst src2) mask));
24463 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24464 ins_encode %{
24465 int vlen_enc = vector_length_encoding(this);
24466 BasicType bt = Matcher::vector_element_basic_type(this);
24467 int opc = this->ideal_Opcode();
24468 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24469 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24470 %}
24471 ins_pipe( pipe_slow );
24472 %}
24473
24474 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24475 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24476 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24477 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24478 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24479 ins_encode %{
24480 int vlen_enc = vector_length_encoding(this);
24481 BasicType bt = Matcher::vector_element_basic_type(this);
24482 int opc = this->ideal_Opcode();
24483 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24484 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24485 %}
24486 ins_pipe( pipe_slow );
24487 %}
24488
24489 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24490 predicate(!n->as_ShiftV()->is_var_shift());
24491 match(Set dst (RShiftVS (Binary dst src2) mask));
24492 match(Set dst (RShiftVI (Binary dst src2) mask));
24493 match(Set dst (RShiftVL (Binary dst src2) mask));
24494 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24495 ins_encode %{
24496 int vlen_enc = vector_length_encoding(this);
24497 BasicType bt = Matcher::vector_element_basic_type(this);
24498 int opc = this->ideal_Opcode();
24499 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24500 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24501 %}
24502 ins_pipe( pipe_slow );
24503 %}
24504
24505 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24506 predicate(n->as_ShiftV()->is_var_shift());
24507 match(Set dst (RShiftVS (Binary dst src2) mask));
24508 match(Set dst (RShiftVI (Binary dst src2) mask));
24509 match(Set dst (RShiftVL (Binary dst src2) mask));
24510 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24511 ins_encode %{
24512 int vlen_enc = vector_length_encoding(this);
24513 BasicType bt = Matcher::vector_element_basic_type(this);
24514 int opc = this->ideal_Opcode();
24515 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24516 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24517 %}
24518 ins_pipe( pipe_slow );
24519 %}
24520
24521 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24522 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24523 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24524 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24525 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24526 ins_encode %{
24527 int vlen_enc = vector_length_encoding(this);
24528 BasicType bt = Matcher::vector_element_basic_type(this);
24529 int opc = this->ideal_Opcode();
24530 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24531 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24532 %}
24533 ins_pipe( pipe_slow );
24534 %}
24535
24536 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24537 predicate(!n->as_ShiftV()->is_var_shift());
24538 match(Set dst (URShiftVS (Binary dst src2) mask));
24539 match(Set dst (URShiftVI (Binary dst src2) mask));
24540 match(Set dst (URShiftVL (Binary dst src2) mask));
24541 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24542 ins_encode %{
24543 int vlen_enc = vector_length_encoding(this);
24544 BasicType bt = Matcher::vector_element_basic_type(this);
24545 int opc = this->ideal_Opcode();
24546 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24547 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24548 %}
24549 ins_pipe( pipe_slow );
24550 %}
24551
24552 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24553 predicate(n->as_ShiftV()->is_var_shift());
24554 match(Set dst (URShiftVS (Binary dst src2) mask));
24555 match(Set dst (URShiftVI (Binary dst src2) mask));
24556 match(Set dst (URShiftVL (Binary dst src2) mask));
24557 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24558 ins_encode %{
24559 int vlen_enc = vector_length_encoding(this);
24560 BasicType bt = Matcher::vector_element_basic_type(this);
24561 int opc = this->ideal_Opcode();
24562 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24563 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24564 %}
24565 ins_pipe( pipe_slow );
24566 %}
24567
24568 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24569 match(Set dst (MaxV (Binary dst src2) mask));
24570 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24571 ins_encode %{
24572 int vlen_enc = vector_length_encoding(this);
24573 BasicType bt = Matcher::vector_element_basic_type(this);
24574 int opc = this->ideal_Opcode();
24575 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24576 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24577 %}
24578 ins_pipe( pipe_slow );
24579 %}
24580
24581 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24582 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24583 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24584 ins_encode %{
24585 int vlen_enc = vector_length_encoding(this);
24586 BasicType bt = Matcher::vector_element_basic_type(this);
24587 int opc = this->ideal_Opcode();
24588 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24589 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24590 %}
24591 ins_pipe( pipe_slow );
24592 %}
24593
24594 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24595 match(Set dst (MinV (Binary dst src2) mask));
24596 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24597 ins_encode %{
24598 int vlen_enc = vector_length_encoding(this);
24599 BasicType bt = Matcher::vector_element_basic_type(this);
24600 int opc = this->ideal_Opcode();
24601 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24603 %}
24604 ins_pipe( pipe_slow );
24605 %}
24606
24607 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24608 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24609 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24610 ins_encode %{
24611 int vlen_enc = vector_length_encoding(this);
24612 BasicType bt = Matcher::vector_element_basic_type(this);
24613 int opc = this->ideal_Opcode();
24614 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24615 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24616 %}
24617 ins_pipe( pipe_slow );
24618 %}
24619
24620 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24621 match(Set dst (VectorRearrange (Binary dst src2) mask));
24622 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24623 ins_encode %{
24624 int vlen_enc = vector_length_encoding(this);
24625 BasicType bt = Matcher::vector_element_basic_type(this);
24626 int opc = this->ideal_Opcode();
24627 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24628 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24629 %}
24630 ins_pipe( pipe_slow );
24631 %}
24632
24633 instruct vabs_masked(vec dst, kReg mask) %{
24634 match(Set dst (AbsVB dst mask));
24635 match(Set dst (AbsVS dst mask));
24636 match(Set dst (AbsVI dst mask));
24637 match(Set dst (AbsVL dst mask));
24638 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24639 ins_encode %{
24640 int vlen_enc = vector_length_encoding(this);
24641 BasicType bt = Matcher::vector_element_basic_type(this);
24642 int opc = this->ideal_Opcode();
24643 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24644 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24645 %}
24646 ins_pipe( pipe_slow );
24647 %}
24648
24649 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24650 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24651 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24652 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24653 ins_encode %{
24654 assert(UseFMA, "Needs FMA instructions support.");
24655 int vlen_enc = vector_length_encoding(this);
24656 BasicType bt = Matcher::vector_element_basic_type(this);
24657 int opc = this->ideal_Opcode();
24658 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24659 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24660 %}
24661 ins_pipe( pipe_slow );
24662 %}
24663
24664 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24665 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24666 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24667 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24668 ins_encode %{
24669 assert(UseFMA, "Needs FMA instructions support.");
24670 int vlen_enc = vector_length_encoding(this);
24671 BasicType bt = Matcher::vector_element_basic_type(this);
24672 int opc = this->ideal_Opcode();
24673 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24675 %}
24676 ins_pipe( pipe_slow );
24677 %}
24678
24679 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24680 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24681 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24682 ins_encode %{
24683 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24684 int vlen_enc = vector_length_encoding(this, $src1);
24685 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24686
24687 // Comparison i
24688 switch (src1_elem_bt) {
24689 case T_BYTE: {
24690 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24691 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24692 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24693 break;
24694 }
24695 case T_SHORT: {
24696 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24697 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24698 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24699 break;
24700 }
24701 case T_INT: {
24702 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24703 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24704 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24705 break;
24706 }
24707 case T_LONG: {
24708 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24709 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24710 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24711 break;
24712 }
24713 case T_FLOAT: {
24714 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24715 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24716 break;
24717 }
24718 case T_DOUBLE: {
24719 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24720 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24721 break;
24722 }
24723 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24724 }
24725 %}
24726 ins_pipe( pipe_slow );
24727 %}
24728
24729 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24730 predicate(Matcher::vector_length(n) <= 32);
24731 match(Set dst (MaskAll src));
24732 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24733 ins_encode %{
24734 int mask_len = Matcher::vector_length(this);
24735 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24736 %}
24737 ins_pipe( pipe_slow );
24738 %}
24739
24740 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24741 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24742 match(Set dst (XorVMask src (MaskAll cnt)));
24743 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24744 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24745 ins_encode %{
24746 uint masklen = Matcher::vector_length(this);
24747 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24748 %}
24749 ins_pipe( pipe_slow );
24750 %}
24751
24752 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24753 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24754 (Matcher::vector_length(n) == 16) ||
24755 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24756 match(Set dst (XorVMask src (MaskAll cnt)));
24757 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24758 ins_encode %{
24759 uint masklen = Matcher::vector_length(this);
24760 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24761 %}
24762 ins_pipe( pipe_slow );
24763 %}
24764
24765 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24766 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24767 match(Set dst (VectorLongToMask src));
24768 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24769 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24770 ins_encode %{
24771 int mask_len = Matcher::vector_length(this);
24772 int vec_enc = vector_length_encoding(mask_len);
24773 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24774 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24775 %}
24776 ins_pipe( pipe_slow );
24777 %}
24778
24779
24780 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24781 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24782 match(Set dst (VectorLongToMask src));
24783 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24784 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24785 ins_encode %{
24786 int mask_len = Matcher::vector_length(this);
24787 assert(mask_len <= 32, "invalid mask length");
24788 int vec_enc = vector_length_encoding(mask_len);
24789 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24790 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24791 %}
24792 ins_pipe( pipe_slow );
24793 %}
24794
24795 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24796 predicate(n->bottom_type()->isa_vectmask());
24797 match(Set dst (VectorLongToMask src));
24798 format %{ "long_to_mask_evex $dst, $src\t!" %}
24799 ins_encode %{
24800 __ kmov($dst$$KRegister, $src$$Register);
24801 %}
24802 ins_pipe( pipe_slow );
24803 %}
24804
24805 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24806 match(Set dst (AndVMask src1 src2));
24807 match(Set dst (OrVMask src1 src2));
24808 match(Set dst (XorVMask src1 src2));
24809 effect(TEMP kscratch);
24810 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24811 ins_encode %{
24812 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24813 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24814 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24815 uint masklen = Matcher::vector_length(this);
24816 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24817 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24818 %}
24819 ins_pipe( pipe_slow );
24820 %}
24821
24822 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24823 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24824 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24825 ins_encode %{
24826 int vlen_enc = vector_length_encoding(this);
24827 BasicType bt = Matcher::vector_element_basic_type(this);
24828 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24829 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24830 %}
24831 ins_pipe( pipe_slow );
24832 %}
24833
24834 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24835 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24836 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24837 ins_encode %{
24838 int vlen_enc = vector_length_encoding(this);
24839 BasicType bt = Matcher::vector_element_basic_type(this);
24840 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24841 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24842 %}
24843 ins_pipe( pipe_slow );
24844 %}
24845
24846 instruct castMM(kReg dst)
24847 %{
24848 match(Set dst (CastVV dst));
24849
24850 size(0);
24851 format %{ "# castVV of $dst" %}
24852 ins_encode(/* empty encoding */);
24853 ins_cost(0);
24854 ins_pipe(empty);
24855 %}
24856
24857 instruct castVV(vec dst)
24858 %{
24859 match(Set dst (CastVV dst));
24860
24861 size(0);
24862 format %{ "# castVV of $dst" %}
24863 ins_encode(/* empty encoding */);
24864 ins_cost(0);
24865 ins_pipe(empty);
24866 %}
24867
24868 instruct castVVLeg(legVec dst)
24869 %{
24870 match(Set dst (CastVV dst));
24871
24872 size(0);
24873 format %{ "# castVV of $dst" %}
24874 ins_encode(/* empty encoding */);
24875 ins_cost(0);
24876 ins_pipe(empty);
24877 %}
24878
24879 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24880 %{
24881 match(Set dst (IsInfiniteF src));
24882 effect(TEMP ktmp, KILL cr);
24883 format %{ "float_class_check $dst, $src" %}
24884 ins_encode %{
24885 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24886 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24887 %}
24888 ins_pipe(pipe_slow);
24889 %}
24890
24891 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24892 %{
24893 match(Set dst (IsInfiniteD src));
24894 effect(TEMP ktmp, KILL cr);
24895 format %{ "double_class_check $dst, $src" %}
24896 ins_encode %{
24897 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24898 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24899 %}
24900 ins_pipe(pipe_slow);
24901 %}
24902
24903 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24904 %{
24905 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24906 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24907 match(Set dst (SaturatingAddV src1 src2));
24908 match(Set dst (SaturatingSubV src1 src2));
24909 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24910 ins_encode %{
24911 int vlen_enc = vector_length_encoding(this);
24912 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24913 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24914 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24915 %}
24916 ins_pipe(pipe_slow);
24917 %}
24918
24919 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24920 %{
24921 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24922 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24923 match(Set dst (SaturatingAddV src1 src2));
24924 match(Set dst (SaturatingSubV src1 src2));
24925 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24926 ins_encode %{
24927 int vlen_enc = vector_length_encoding(this);
24928 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24929 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24930 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24931 %}
24932 ins_pipe(pipe_slow);
24933 %}
24934
24935 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24936 %{
24937 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24938 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24939 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24940 match(Set dst (SaturatingAddV src1 src2));
24941 match(Set dst (SaturatingSubV src1 src2));
24942 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24943 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24944 ins_encode %{
24945 int vlen_enc = vector_length_encoding(this);
24946 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24947 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24948 $src1$$XMMRegister, $src2$$XMMRegister,
24949 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24950 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24951 %}
24952 ins_pipe(pipe_slow);
24953 %}
24954
24955 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24956 %{
24957 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24958 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24959 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24960 match(Set dst (SaturatingAddV src1 src2));
24961 match(Set dst (SaturatingSubV src1 src2));
24962 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24963 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24964 ins_encode %{
24965 int vlen_enc = vector_length_encoding(this);
24966 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24967 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24968 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24969 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24970 %}
24971 ins_pipe(pipe_slow);
24972 %}
24973
24974 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24975 %{
24976 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24977 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24978 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24979 match(Set dst (SaturatingAddV src1 src2));
24980 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24981 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24982 ins_encode %{
24983 int vlen_enc = vector_length_encoding(this);
24984 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24985 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24986 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24987 %}
24988 ins_pipe(pipe_slow);
24989 %}
24990
24991 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24992 %{
24993 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24994 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24995 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24996 match(Set dst (SaturatingAddV src1 src2));
24997 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24998 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24999 ins_encode %{
25000 int vlen_enc = vector_length_encoding(this);
25001 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25002 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25003 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25004 %}
25005 ins_pipe(pipe_slow);
25006 %}
25007
25008 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25009 %{
25010 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25011 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25012 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25013 match(Set dst (SaturatingSubV src1 src2));
25014 effect(TEMP ktmp);
25015 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25016 ins_encode %{
25017 int vlen_enc = vector_length_encoding(this);
25018 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25019 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25020 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25021 %}
25022 ins_pipe(pipe_slow);
25023 %}
25024
25025 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25026 %{
25027 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25028 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25029 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25030 match(Set dst (SaturatingSubV src1 src2));
25031 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25032 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25033 ins_encode %{
25034 int vlen_enc = vector_length_encoding(this);
25035 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25036 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25037 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25038 %}
25039 ins_pipe(pipe_slow);
25040 %}
25041
25042 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25043 %{
25044 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25045 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25046 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25047 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25048 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25049 ins_encode %{
25050 int vlen_enc = vector_length_encoding(this);
25051 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25052 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25053 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25054 %}
25055 ins_pipe(pipe_slow);
25056 %}
25057
25058 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25059 %{
25060 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25061 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25062 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25063 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25064 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25065 ins_encode %{
25066 int vlen_enc = vector_length_encoding(this);
25067 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25068 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25069 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25070 %}
25071 ins_pipe(pipe_slow);
25072 %}
25073
25074 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25075 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25076 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25077 match(Set dst (SaturatingAddV (Binary dst src) mask));
25078 match(Set dst (SaturatingSubV (Binary dst src) mask));
25079 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25080 ins_encode %{
25081 int vlen_enc = vector_length_encoding(this);
25082 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25083 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25084 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25085 %}
25086 ins_pipe( pipe_slow );
25087 %}
25088
25089 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25090 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25091 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25092 match(Set dst (SaturatingAddV (Binary dst src) mask));
25093 match(Set dst (SaturatingSubV (Binary dst src) mask));
25094 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25095 ins_encode %{
25096 int vlen_enc = vector_length_encoding(this);
25097 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25098 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25099 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25100 %}
25101 ins_pipe( pipe_slow );
25102 %}
25103
25104 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25105 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25106 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25107 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25108 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25109 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25110 ins_encode %{
25111 int vlen_enc = vector_length_encoding(this);
25112 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25113 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25114 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25115 %}
25116 ins_pipe( pipe_slow );
25117 %}
25118
25119 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25120 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25121 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25122 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25123 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25124 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25125 ins_encode %{
25126 int vlen_enc = vector_length_encoding(this);
25127 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25128 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25129 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25130 %}
25131 ins_pipe( pipe_slow );
25132 %}
25133
25134 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25135 %{
25136 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25137 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25138 ins_encode %{
25139 int vlen_enc = vector_length_encoding(this);
25140 BasicType bt = Matcher::vector_element_basic_type(this);
25141 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25142 %}
25143 ins_pipe(pipe_slow);
25144 %}
25145
25146 instruct reinterpretS2HF(regF dst, rRegI src)
25147 %{
25148 match(Set dst (ReinterpretS2HF src));
25149 format %{ "vmovw $dst, $src" %}
25150 ins_encode %{
25151 __ vmovw($dst$$XMMRegister, $src$$Register);
25152 %}
25153 ins_pipe(pipe_slow);
25154 %}
25155
25156 instruct reinterpretHF2S(rRegI dst, regF src)
25157 %{
25158 match(Set dst (ReinterpretHF2S src));
25159 format %{ "vmovw $dst, $src" %}
25160 ins_encode %{
25161 __ vmovw($dst$$Register, $src$$XMMRegister);
25162 %}
25163 ins_pipe(pipe_slow);
25164 %}
25165
25166 instruct convF2HFAndS2HF(regF dst, regF src)
25167 %{
25168 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25169 format %{ "convF2HFAndS2HF $dst, $src" %}
25170 ins_encode %{
25171 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25172 %}
25173 ins_pipe(pipe_slow);
25174 %}
25175
25176 instruct convHF2SAndHF2F(regF dst, regF src)
25177 %{
25178 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25179 format %{ "convHF2SAndHF2F $dst, $src" %}
25180 ins_encode %{
25181 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25182 %}
25183 ins_pipe(pipe_slow);
25184 %}
25185
25186 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25187 %{
25188 match(Set dst (SqrtHF src));
25189 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25190 ins_encode %{
25191 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25192 %}
25193 ins_pipe(pipe_slow);
25194 %}
25195
25196 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25197 %{
25198 match(Set dst (AddHF src1 src2));
25199 match(Set dst (DivHF src1 src2));
25200 match(Set dst (MulHF src1 src2));
25201 match(Set dst (SubHF src1 src2));
25202 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25203 ins_encode %{
25204 int opcode = this->ideal_Opcode();
25205 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25206 %}
25207 ins_pipe(pipe_slow);
25208 %}
25209
25210 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25211 %{
25212 predicate(VM_Version::supports_avx10_2());
25213 match(Set dst (MaxHF src1 src2));
25214 match(Set dst (MinHF src1 src2));
25215 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25216 ins_encode %{
25217 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25218 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25219 %}
25220 ins_pipe( pipe_slow );
25221 %}
25222
25223 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25224 %{
25225 predicate(!VM_Version::supports_avx10_2());
25226 match(Set dst (MaxHF src1 src2));
25227 match(Set dst (MinHF src1 src2));
25228 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25229 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25230 ins_encode %{
25231 int opcode = this->ideal_Opcode();
25232 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25233 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25234 %}
25235 ins_pipe( pipe_slow );
25236 %}
25237
25238 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25239 %{
25240 match(Set dst (FmaHF src2 (Binary dst src1)));
25241 effect(DEF dst);
25242 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25243 ins_encode %{
25244 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25245 %}
25246 ins_pipe( pipe_slow );
25247 %}
25248
25249
25250 instruct vector_sqrt_HF_reg(vec dst, vec src)
25251 %{
25252 match(Set dst (SqrtVHF src));
25253 format %{ "vector_sqrt_fp16 $dst, $src" %}
25254 ins_encode %{
25255 int vlen_enc = vector_length_encoding(this);
25256 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25257 %}
25258 ins_pipe(pipe_slow);
25259 %}
25260
25261 instruct vector_sqrt_HF_mem(vec dst, memory src)
25262 %{
25263 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25264 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25265 ins_encode %{
25266 int vlen_enc = vector_length_encoding(this);
25267 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25268 %}
25269 ins_pipe(pipe_slow);
25270 %}
25271
25272 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25273 %{
25274 match(Set dst (AddVHF src1 src2));
25275 match(Set dst (DivVHF src1 src2));
25276 match(Set dst (MulVHF src1 src2));
25277 match(Set dst (SubVHF src1 src2));
25278 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25279 ins_encode %{
25280 int vlen_enc = vector_length_encoding(this);
25281 int opcode = this->ideal_Opcode();
25282 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25283 %}
25284 ins_pipe(pipe_slow);
25285 %}
25286
25287
25288 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25289 %{
25290 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25291 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25292 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25293 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25294 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25295 ins_encode %{
25296 int vlen_enc = vector_length_encoding(this);
25297 int opcode = this->ideal_Opcode();
25298 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25299 %}
25300 ins_pipe(pipe_slow);
25301 %}
25302
25303 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25304 %{
25305 match(Set dst (FmaVHF src2 (Binary dst src1)));
25306 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25307 ins_encode %{
25308 int vlen_enc = vector_length_encoding(this);
25309 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25310 %}
25311 ins_pipe( pipe_slow );
25312 %}
25313
25314 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25315 %{
25316 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25317 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25318 ins_encode %{
25319 int vlen_enc = vector_length_encoding(this);
25320 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25321 %}
25322 ins_pipe( pipe_slow );
25323 %}
25324
25325 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25326 %{
25327 predicate(VM_Version::supports_avx10_2());
25328 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25329 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25330 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25331 ins_encode %{
25332 int vlen_enc = vector_length_encoding(this);
25333 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25334 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25335 %}
25336 ins_pipe( pipe_slow );
25337 %}
25338
25339 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25340 %{
25341 predicate(VM_Version::supports_avx10_2());
25342 match(Set dst (MinVHF src1 src2));
25343 match(Set dst (MaxVHF src1 src2));
25344 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25345 ins_encode %{
25346 int vlen_enc = vector_length_encoding(this);
25347 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25348 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25349 %}
25350 ins_pipe( pipe_slow );
25351 %}
25352
25353 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25354 %{
25355 predicate(!VM_Version::supports_avx10_2());
25356 match(Set dst (MinVHF src1 src2));
25357 match(Set dst (MaxVHF src1 src2));
25358 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25359 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25360 ins_encode %{
25361 int vlen_enc = vector_length_encoding(this);
25362 int opcode = this->ideal_Opcode();
25363 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25364 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25365 %}
25366 ins_pipe( pipe_slow );
25367 %}
25368
25369 //----------PEEPHOLE RULES-----------------------------------------------------
25370 // These must follow all instruction definitions as they use the names
25371 // defined in the instructions definitions.
25372 //
25373 // peeppredicate ( rule_predicate );
25374 // // the predicate unless which the peephole rule will be ignored
25375 //
25376 // peepmatch ( root_instr_name [preceding_instruction]* );
25377 //
25378 // peepprocedure ( procedure_name );
25379 // // provide a procedure name to perform the optimization, the procedure should
25380 // // reside in the architecture dependent peephole file, the method has the
25381 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25382 // // with the arguments being the basic block, the current node index inside the
25383 // // block, the register allocator, the functions upon invoked return a new node
25384 // // defined in peepreplace, and the rules of the nodes appearing in the
25385 // // corresponding peepmatch, the function return true if successful, else
25386 // // return false
25387 //
25388 // peepconstraint %{
25389 // (instruction_number.operand_name relational_op instruction_number.operand_name
25390 // [, ...] );
25391 // // instruction numbers are zero-based using left to right order in peepmatch
25392 //
25393 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25394 // // provide an instruction_number.operand_name for each operand that appears
25395 // // in the replacement instruction's match rule
25396 //
25397 // ---------VM FLAGS---------------------------------------------------------
25398 //
25399 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25400 //
25401 // Each peephole rule is given an identifying number starting with zero and
25402 // increasing by one in the order seen by the parser. An individual peephole
25403 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25404 // on the command-line.
25405 //
25406 // ---------CURRENT LIMITATIONS----------------------------------------------
25407 //
25408 // Only transformations inside a basic block (do we need more for peephole)
25409 //
25410 // ---------EXAMPLE----------------------------------------------------------
25411 //
25412 // // pertinent parts of existing instructions in architecture description
25413 // instruct movI(rRegI dst, rRegI src)
25414 // %{
25415 // match(Set dst (CopyI src));
25416 // %}
25417 //
25418 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25419 // %{
25420 // match(Set dst (AddI dst src));
25421 // effect(KILL cr);
25422 // %}
25423 //
25424 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25425 // %{
25426 // match(Set dst (AddI dst src));
25427 // %}
25428 //
25429 // 1. Simple replacement
25430 // - Only match adjacent instructions in same basic block
25431 // - Only equality constraints
25432 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25433 // - Only one replacement instruction
25434 //
25435 // // Change (inc mov) to lea
25436 // peephole %{
25437 // // lea should only be emitted when beneficial
25438 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25439 // // increment preceded by register-register move
25440 // peepmatch ( incI_rReg movI );
25441 // // require that the destination register of the increment
25442 // // match the destination register of the move
25443 // peepconstraint ( 0.dst == 1.dst );
25444 // // construct a replacement instruction that sets
25445 // // the destination to ( move's source register + one )
25446 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25447 // %}
25448 //
25449 // 2. Procedural replacement
25450 // - More flexible finding relevent nodes
25451 // - More flexible constraints
25452 // - More flexible transformations
25453 // - May utilise architecture-dependent API more effectively
25454 // - Currently only one replacement instruction due to adlc parsing capabilities
25455 //
25456 // // Change (inc mov) to lea
25457 // peephole %{
25458 // // lea should only be emitted when beneficial
25459 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25460 // // the rule numbers of these nodes inside are passed into the function below
25461 // peepmatch ( incI_rReg movI );
25462 // // the method that takes the responsibility of transformation
25463 // peepprocedure ( inc_mov_to_lea );
25464 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25465 // // node is passed into the function above
25466 // peepreplace ( leaI_rReg_immI() );
25467 // %}
25468
25469 // These instructions is not matched by the matcher but used by the peephole
25470 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25471 %{
25472 predicate(false);
25473 match(Set dst (AddI src1 src2));
25474 format %{ "leal $dst, [$src1 + $src2]" %}
25475 ins_encode %{
25476 Register dst = $dst$$Register;
25477 Register src1 = $src1$$Register;
25478 Register src2 = $src2$$Register;
25479 if (src1 != rbp && src1 != r13) {
25480 __ leal(dst, Address(src1, src2, Address::times_1));
25481 } else {
25482 assert(src2 != rbp && src2 != r13, "");
25483 __ leal(dst, Address(src2, src1, Address::times_1));
25484 }
25485 %}
25486 ins_pipe(ialu_reg_reg);
25487 %}
25488
25489 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25490 %{
25491 predicate(false);
25492 match(Set dst (AddI src1 src2));
25493 format %{ "leal $dst, [$src1 + $src2]" %}
25494 ins_encode %{
25495 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25496 %}
25497 ins_pipe(ialu_reg_reg);
25498 %}
25499
25500 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25501 %{
25502 predicate(false);
25503 match(Set dst (LShiftI src shift));
25504 format %{ "leal $dst, [$src << $shift]" %}
25505 ins_encode %{
25506 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25507 Register src = $src$$Register;
25508 if (scale == Address::times_2 && src != rbp && src != r13) {
25509 __ leal($dst$$Register, Address(src, src, Address::times_1));
25510 } else {
25511 __ leal($dst$$Register, Address(noreg, src, scale));
25512 }
25513 %}
25514 ins_pipe(ialu_reg_reg);
25515 %}
25516
25517 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25518 %{
25519 predicate(false);
25520 match(Set dst (AddL src1 src2));
25521 format %{ "leaq $dst, [$src1 + $src2]" %}
25522 ins_encode %{
25523 Register dst = $dst$$Register;
25524 Register src1 = $src1$$Register;
25525 Register src2 = $src2$$Register;
25526 if (src1 != rbp && src1 != r13) {
25527 __ leaq(dst, Address(src1, src2, Address::times_1));
25528 } else {
25529 assert(src2 != rbp && src2 != r13, "");
25530 __ leaq(dst, Address(src2, src1, Address::times_1));
25531 }
25532 %}
25533 ins_pipe(ialu_reg_reg);
25534 %}
25535
25536 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25537 %{
25538 predicate(false);
25539 match(Set dst (AddL src1 src2));
25540 format %{ "leaq $dst, [$src1 + $src2]" %}
25541 ins_encode %{
25542 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25543 %}
25544 ins_pipe(ialu_reg_reg);
25545 %}
25546
25547 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25548 %{
25549 predicate(false);
25550 match(Set dst (LShiftL src shift));
25551 format %{ "leaq $dst, [$src << $shift]" %}
25552 ins_encode %{
25553 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25554 Register src = $src$$Register;
25555 if (scale == Address::times_2 && src != rbp && src != r13) {
25556 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25557 } else {
25558 __ leaq($dst$$Register, Address(noreg, src, scale));
25559 }
25560 %}
25561 ins_pipe(ialu_reg_reg);
25562 %}
25563
25564 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25565 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25566 // processors with at least partial ALU support for lea
25567 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25568 // beneficial for processors with full ALU support
25569 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25570
25571 peephole
25572 %{
25573 peeppredicate(VM_Version::supports_fast_2op_lea());
25574 peepmatch (addI_rReg);
25575 peepprocedure (lea_coalesce_reg);
25576 peepreplace (leaI_rReg_rReg_peep());
25577 %}
25578
25579 peephole
25580 %{
25581 peeppredicate(VM_Version::supports_fast_2op_lea());
25582 peepmatch (addI_rReg_imm);
25583 peepprocedure (lea_coalesce_imm);
25584 peepreplace (leaI_rReg_immI_peep());
25585 %}
25586
25587 peephole
25588 %{
25589 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25590 VM_Version::is_intel_cascade_lake());
25591 peepmatch (incI_rReg);
25592 peepprocedure (lea_coalesce_imm);
25593 peepreplace (leaI_rReg_immI_peep());
25594 %}
25595
25596 peephole
25597 %{
25598 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25599 VM_Version::is_intel_cascade_lake());
25600 peepmatch (decI_rReg);
25601 peepprocedure (lea_coalesce_imm);
25602 peepreplace (leaI_rReg_immI_peep());
25603 %}
25604
25605 peephole
25606 %{
25607 peeppredicate(VM_Version::supports_fast_2op_lea());
25608 peepmatch (salI_rReg_immI2);
25609 peepprocedure (lea_coalesce_imm);
25610 peepreplace (leaI_rReg_immI2_peep());
25611 %}
25612
25613 peephole
25614 %{
25615 peeppredicate(VM_Version::supports_fast_2op_lea());
25616 peepmatch (addL_rReg);
25617 peepprocedure (lea_coalesce_reg);
25618 peepreplace (leaL_rReg_rReg_peep());
25619 %}
25620
25621 peephole
25622 %{
25623 peeppredicate(VM_Version::supports_fast_2op_lea());
25624 peepmatch (addL_rReg_imm);
25625 peepprocedure (lea_coalesce_imm);
25626 peepreplace (leaL_rReg_immL32_peep());
25627 %}
25628
25629 peephole
25630 %{
25631 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25632 VM_Version::is_intel_cascade_lake());
25633 peepmatch (incL_rReg);
25634 peepprocedure (lea_coalesce_imm);
25635 peepreplace (leaL_rReg_immL32_peep());
25636 %}
25637
25638 peephole
25639 %{
25640 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25641 VM_Version::is_intel_cascade_lake());
25642 peepmatch (decL_rReg);
25643 peepprocedure (lea_coalesce_imm);
25644 peepreplace (leaL_rReg_immL32_peep());
25645 %}
25646
25647 peephole
25648 %{
25649 peeppredicate(VM_Version::supports_fast_2op_lea());
25650 peepmatch (salL_rReg_immI2);
25651 peepprocedure (lea_coalesce_imm);
25652 peepreplace (leaL_rReg_immI2_peep());
25653 %}
25654
25655 peephole
25656 %{
25657 peepmatch (leaPCompressedOopOffset);
25658 peepprocedure (lea_remove_redundant);
25659 %}
25660
25661 peephole
25662 %{
25663 peepmatch (leaP8Narrow);
25664 peepprocedure (lea_remove_redundant);
25665 %}
25666
25667 peephole
25668 %{
25669 peepmatch (leaP32Narrow);
25670 peepprocedure (lea_remove_redundant);
25671 %}
25672
25673 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25674 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25675
25676 //int variant
25677 peephole
25678 %{
25679 peepmatch (testI_reg);
25680 peepprocedure (test_may_remove);
25681 %}
25682
25683 //long variant
25684 peephole
25685 %{
25686 peepmatch (testL_reg);
25687 peepprocedure (test_may_remove);
25688 %}
25689
25690
25691 //----------SMARTSPILL RULES---------------------------------------------------
25692 // These must follow all instruction definitions as they use the names
25693 // defined in the instructions definitions.