1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1683 }
1684
1685 // This could be in MacroAssembler but it's fairly C2 specific
1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1687 Label exit;
1688 __ jccb(Assembler::noParity, exit);
1689 __ pushf();
1690 //
1691 // comiss/ucomiss instructions set ZF,PF,CF flags and
1692 // zero OF,AF,SF for NaN values.
1693 // Fixup flags by zeroing ZF,PF so that compare of NaN
1694 // values returns 'less than' result (CF is set).
1695 // Leave the rest of flags unchanged.
1696 //
1697 // 7 6 5 4 3 2 1 0
1698 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1699 // 0 0 1 0 1 0 1 1 (0x2B)
1700 //
1701 __ andq(Address(rsp, 0), 0xffffff2b);
1702 __ popf();
1703 __ bind(exit);
1704 }
1705
1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1707 // If any floating point comparison instruction is used, unordered case always triggers jump
1708 // for below condition, CF=1 is true when at least one input is NaN
1709 Label done;
1710 __ movl(dst, -1);
1711 __ jcc(Assembler::below, done);
1712 __ setcc(Assembler::notEqual, dst);
1713 __ bind(done);
1714 }
1715
1716 enum FP_PREC {
1717 fp_prec_hlf,
1718 fp_prec_flt,
1719 fp_prec_dbl
1720 };
1721
1722 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1723 XMMRegister p, XMMRegister q) {
1724 if (pt == fp_prec_hlf) {
1725 __ evucomish(p, q);
1726 } else if (pt == fp_prec_flt) {
1727 __ ucomiss(p, q);
1728 } else {
1729 __ ucomisd(p, q);
1730 }
1731 }
1732
1733 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1734 XMMRegister dst, XMMRegister src, Register scratch) {
1735 if (pt == fp_prec_hlf) {
1736 __ movhlf(dst, src, scratch);
1737 } else if (pt == fp_prec_flt) {
1738 __ movflt(dst, src);
1739 } else {
1740 __ movdbl(dst, src);
1741 }
1742 }
1743
1744 // Math.min() # Math.max()
1745 // -----------------------------
1746 // (v)ucomis[h/s/d] #
1747 // ja -> b # a
1748 // jp -> NaN # NaN
1749 // jb -> a # b
1750 // je #
1751 // |-jz -> a | b # a & b
1752 // | -> a #
1753 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1754 XMMRegister a, XMMRegister b,
1755 XMMRegister xmmt, Register rt,
1756 bool min, enum FP_PREC pt) {
1757
1758 Label nan, zero, below, above, done;
1759
1760 emit_fp_ucom(masm, pt, a, b);
1761
1762 if (dst->encoding() != (min ? b : a)->encoding()) {
1763 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1764 } else {
1765 __ jccb(Assembler::above, done);
1766 }
1767
1768 __ jccb(Assembler::parity, nan); // PF=1
1769 __ jccb(Assembler::below, below); // CF=1
1770
1771 // equal
1772 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1773 emit_fp_ucom(masm, pt, a, xmmt);
1774
1775 __ jccb(Assembler::equal, zero);
1776 movfp(masm, pt, dst, a, rt);
1777
1778 __ jmp(done);
1779
1780 __ bind(zero);
1781 if (min) {
1782 __ vpor(dst, a, b, Assembler::AVX_128bit);
1783 } else {
1784 __ vpand(dst, a, b, Assembler::AVX_128bit);
1785 }
1786
1787 __ jmp(done);
1788
1789 __ bind(above);
1790 movfp(masm, pt, dst, min ? b : a, rt);
1791
1792 __ jmp(done);
1793
1794 __ bind(nan);
1795 if (pt == fp_prec_hlf) {
1796 __ movl(rt, 0x00007e00); // Float16.NaN
1797 __ evmovw(dst, rt);
1798 } else if (pt == fp_prec_flt) {
1799 __ movl(rt, 0x7fc00000); // Float.NaN
1800 __ movdl(dst, rt);
1801 } else {
1802 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1803 __ movdq(dst, rt);
1804 }
1805 __ jmp(done);
1806
1807 __ bind(below);
1808 movfp(masm, pt, dst, min ? a : b, rt);
1809
1810 __ bind(done);
1811 }
1812
1813 //=============================================================================
1814 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1815
1816 int ConstantTable::calculate_table_base_offset() const {
1817 return 0; // absolute addressing, no offset
1818 }
1819
1820 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1821 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1822 ShouldNotReachHere();
1823 }
1824
1825 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1826 // Empty encoding
1827 }
1828
1829 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1830 return 0;
1831 }
1832
1833 #ifndef PRODUCT
1834 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1835 st->print("# MachConstantBaseNode (empty encoding)");
1836 }
1837 #endif
1838
1839
1840 //=============================================================================
1841 #ifndef PRODUCT
1842 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1843 Compile* C = ra_->C;
1844
1845 int framesize = C->output()->frame_size_in_bytes();
1846 int bangsize = C->output()->bang_size_in_bytes();
1847 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1848 // Remove wordSize for return addr which is already pushed.
1849 framesize -= wordSize;
1850
1851 if (C->output()->need_stack_bang(bangsize)) {
1852 framesize -= wordSize;
1853 st->print("# stack bang (%d bytes)", bangsize);
1854 st->print("\n\t");
1855 st->print("pushq rbp\t# Save rbp");
1856 if (PreserveFramePointer) {
1857 st->print("\n\t");
1858 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1859 }
1860 if (framesize) {
1861 st->print("\n\t");
1862 st->print("subq rsp, #%d\t# Create frame",framesize);
1863 }
1864 } else {
1865 st->print("subq rsp, #%d\t# Create frame",framesize);
1866 st->print("\n\t");
1867 framesize -= wordSize;
1868 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1869 if (PreserveFramePointer) {
1870 st->print("\n\t");
1871 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1872 if (framesize > 0) {
1873 st->print("\n\t");
1874 st->print("addq rbp, #%d", framesize);
1875 }
1876 }
1877 }
1878
1879 if (VerifyStackAtCalls) {
1880 st->print("\n\t");
1881 framesize -= wordSize;
1882 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1883 #ifdef ASSERT
1884 st->print("\n\t");
1885 st->print("# stack alignment check");
1886 #endif
1887 }
1888 if (C->stub_function() != nullptr) {
1889 st->print("\n\t");
1890 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1891 st->print("\n\t");
1892 st->print("je fast_entry\t");
1893 st->print("\n\t");
1894 st->print("call #nmethod_entry_barrier_stub\t");
1895 st->print("\n\tfast_entry:");
1896 }
1897 st->cr();
1898 }
1899 #endif
1900
1901 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1902 Compile* C = ra_->C;
1903
1904 __ verified_entry(C);
1905
1906 if (ra_->C->stub_function() == nullptr) {
1907 __ entry_barrier();
1908 }
1909
1910 if (!Compile::current()->output()->in_scratch_emit_size()) {
1911 __ bind(*_verified_entry);
1912 }
1913
1914 C->output()->set_frame_complete(__ offset());
1915
1916 if (C->has_mach_constant_base_node()) {
1917 // NOTE: We set the table base offset here because users might be
1918 // emitted before MachConstantBaseNode.
1919 ConstantTable& constant_table = C->output()->constant_table();
1920 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1921 }
1922 }
1923
1924
1925 int MachPrologNode::reloc() const
1926 {
1927 return 0; // a large enough number
1928 }
1929
1930 //=============================================================================
1931 #ifndef PRODUCT
1932 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1933 {
1934 Compile* C = ra_->C;
1935 if (generate_vzeroupper(C)) {
1936 st->print("vzeroupper");
1937 st->cr(); st->print("\t");
1938 }
1939
1940 int framesize = C->output()->frame_size_in_bytes();
1941 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1942 // Remove word for return adr already pushed
1943 // and RBP
1944 framesize -= 2*wordSize;
1945
1946 if (framesize) {
1947 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1948 st->print("\t");
1949 }
1950
1951 st->print_cr("popq rbp");
1952 if (do_polling() && C->is_method_compilation()) {
1953 st->print("\t");
1954 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1955 "ja #safepoint_stub\t"
1956 "# Safepoint: poll for GC");
1957 }
1958 }
1959 #endif
1960
1961 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1962 {
1963 Compile* C = ra_->C;
1964
1965 if (generate_vzeroupper(C)) {
1966 // Clear upper bits of YMM registers when current compiled code uses
1967 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1968 __ vzeroupper();
1969 }
1970
1971 // Subtract two words to account for return address and rbp
1972 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1973 __ remove_frame(initial_framesize, C->needs_stack_repair());
1974
1975 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1976 __ reserved_stack_check();
1977 }
1978
1979 if (do_polling() && C->is_method_compilation()) {
1980 Label dummy_label;
1981 Label* code_stub = &dummy_label;
1982 if (!C->output()->in_scratch_emit_size()) {
1983 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1984 C->output()->add_stub(stub);
1985 code_stub = &stub->entry();
1986 }
1987 __ relocate(relocInfo::poll_return_type);
1988 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1989 }
1990 }
1991
1992 int MachEpilogNode::reloc() const
1993 {
1994 return 2; // a large enough number
1995 }
1996
1997 const Pipeline* MachEpilogNode::pipeline() const
1998 {
1999 return MachNode::pipeline_class();
2000 }
2001
2002 //=============================================================================
2003
2004 enum RC {
2005 rc_bad,
2006 rc_int,
2007 rc_kreg,
2008 rc_float,
2009 rc_stack
2010 };
2011
2012 static enum RC rc_class(OptoReg::Name reg)
2013 {
2014 if( !OptoReg::is_valid(reg) ) return rc_bad;
2015
2016 if (OptoReg::is_stack(reg)) return rc_stack;
2017
2018 VMReg r = OptoReg::as_VMReg(reg);
2019
2020 if (r->is_Register()) return rc_int;
2021
2022 if (r->is_KRegister()) return rc_kreg;
2023
2024 assert(r->is_XMMRegister(), "must be");
2025 return rc_float;
2026 }
2027
2028 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2029 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2030 int src_hi, int dst_hi, uint ireg, outputStream* st);
2031
2032 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2033 int stack_offset, int reg, uint ireg, outputStream* st);
2034
2035 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2036 int dst_offset, uint ireg, outputStream* st) {
2037 if (masm) {
2038 switch (ireg) {
2039 case Op_VecS:
2040 __ movq(Address(rsp, -8), rax);
2041 __ movl(rax, Address(rsp, src_offset));
2042 __ movl(Address(rsp, dst_offset), rax);
2043 __ movq(rax, Address(rsp, -8));
2044 break;
2045 case Op_VecD:
2046 __ pushq(Address(rsp, src_offset));
2047 __ popq (Address(rsp, dst_offset));
2048 break;
2049 case Op_VecX:
2050 __ pushq(Address(rsp, src_offset));
2051 __ popq (Address(rsp, dst_offset));
2052 __ pushq(Address(rsp, src_offset+8));
2053 __ popq (Address(rsp, dst_offset+8));
2054 break;
2055 case Op_VecY:
2056 __ vmovdqu(Address(rsp, -32), xmm0);
2057 __ vmovdqu(xmm0, Address(rsp, src_offset));
2058 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2059 __ vmovdqu(xmm0, Address(rsp, -32));
2060 break;
2061 case Op_VecZ:
2062 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2063 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2064 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2065 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2066 break;
2067 default:
2068 ShouldNotReachHere();
2069 }
2070 #ifndef PRODUCT
2071 } else {
2072 switch (ireg) {
2073 case Op_VecS:
2074 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2075 "movl rax, [rsp + #%d]\n\t"
2076 "movl [rsp + #%d], rax\n\t"
2077 "movq rax, [rsp - #8]",
2078 src_offset, dst_offset);
2079 break;
2080 case Op_VecD:
2081 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2082 "popq [rsp + #%d]",
2083 src_offset, dst_offset);
2084 break;
2085 case Op_VecX:
2086 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2087 "popq [rsp + #%d]\n\t"
2088 "pushq [rsp + #%d]\n\t"
2089 "popq [rsp + #%d]",
2090 src_offset, dst_offset, src_offset+8, dst_offset+8);
2091 break;
2092 case Op_VecY:
2093 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2094 "vmovdqu xmm0, [rsp + #%d]\n\t"
2095 "vmovdqu [rsp + #%d], xmm0\n\t"
2096 "vmovdqu xmm0, [rsp - #32]",
2097 src_offset, dst_offset);
2098 break;
2099 case Op_VecZ:
2100 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2101 "vmovdqu xmm0, [rsp + #%d]\n\t"
2102 "vmovdqu [rsp + #%d], xmm0\n\t"
2103 "vmovdqu xmm0, [rsp - #64]",
2104 src_offset, dst_offset);
2105 break;
2106 default:
2107 ShouldNotReachHere();
2108 }
2109 #endif
2110 }
2111 }
2112
2113 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2114 PhaseRegAlloc* ra_,
2115 bool do_size,
2116 outputStream* st) const {
2117 assert(masm != nullptr || st != nullptr, "sanity");
2118 // Get registers to move
2119 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2120 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2121 OptoReg::Name dst_second = ra_->get_reg_second(this);
2122 OptoReg::Name dst_first = ra_->get_reg_first(this);
2123
2124 enum RC src_second_rc = rc_class(src_second);
2125 enum RC src_first_rc = rc_class(src_first);
2126 enum RC dst_second_rc = rc_class(dst_second);
2127 enum RC dst_first_rc = rc_class(dst_first);
2128
2129 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2130 "must move at least 1 register" );
2131
2132 if (src_first == dst_first && src_second == dst_second) {
2133 // Self copy, no move
2134 return 0;
2135 }
2136 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2137 uint ireg = ideal_reg();
2138 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2139 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2140 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2141 // mem -> mem
2142 int src_offset = ra_->reg2offset(src_first);
2143 int dst_offset = ra_->reg2offset(dst_first);
2144 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2145 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2146 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2147 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2148 int stack_offset = ra_->reg2offset(dst_first);
2149 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2150 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2151 int stack_offset = ra_->reg2offset(src_first);
2152 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2153 } else {
2154 ShouldNotReachHere();
2155 }
2156 return 0;
2157 }
2158 if (src_first_rc == rc_stack) {
2159 // mem ->
2160 if (dst_first_rc == rc_stack) {
2161 // mem -> mem
2162 assert(src_second != dst_first, "overlap");
2163 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2164 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2165 // 64-bit
2166 int src_offset = ra_->reg2offset(src_first);
2167 int dst_offset = ra_->reg2offset(dst_first);
2168 if (masm) {
2169 __ pushq(Address(rsp, src_offset));
2170 __ popq (Address(rsp, dst_offset));
2171 #ifndef PRODUCT
2172 } else {
2173 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2174 "popq [rsp + #%d]",
2175 src_offset, dst_offset);
2176 #endif
2177 }
2178 } else {
2179 // 32-bit
2180 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2181 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2182 // No pushl/popl, so:
2183 int src_offset = ra_->reg2offset(src_first);
2184 int dst_offset = ra_->reg2offset(dst_first);
2185 if (masm) {
2186 __ movq(Address(rsp, -8), rax);
2187 __ movl(rax, Address(rsp, src_offset));
2188 __ movl(Address(rsp, dst_offset), rax);
2189 __ movq(rax, Address(rsp, -8));
2190 #ifndef PRODUCT
2191 } else {
2192 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2193 "movl rax, [rsp + #%d]\n\t"
2194 "movl [rsp + #%d], rax\n\t"
2195 "movq rax, [rsp - #8]",
2196 src_offset, dst_offset);
2197 #endif
2198 }
2199 }
2200 return 0;
2201 } else if (dst_first_rc == rc_int) {
2202 // mem -> gpr
2203 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2204 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2205 // 64-bit
2206 int offset = ra_->reg2offset(src_first);
2207 if (masm) {
2208 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2209 #ifndef PRODUCT
2210 } else {
2211 st->print("movq %s, [rsp + #%d]\t# spill",
2212 Matcher::regName[dst_first],
2213 offset);
2214 #endif
2215 }
2216 } else {
2217 // 32-bit
2218 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2219 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2220 int offset = ra_->reg2offset(src_first);
2221 if (masm) {
2222 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2223 #ifndef PRODUCT
2224 } else {
2225 st->print("movl %s, [rsp + #%d]\t# spill",
2226 Matcher::regName[dst_first],
2227 offset);
2228 #endif
2229 }
2230 }
2231 return 0;
2232 } else if (dst_first_rc == rc_float) {
2233 // mem-> xmm
2234 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2235 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2236 // 64-bit
2237 int offset = ra_->reg2offset(src_first);
2238 if (masm) {
2239 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2240 #ifndef PRODUCT
2241 } else {
2242 st->print("%s %s, [rsp + #%d]\t# spill",
2243 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2244 Matcher::regName[dst_first],
2245 offset);
2246 #endif
2247 }
2248 } else {
2249 // 32-bit
2250 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2251 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2252 int offset = ra_->reg2offset(src_first);
2253 if (masm) {
2254 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2255 #ifndef PRODUCT
2256 } else {
2257 st->print("movss %s, [rsp + #%d]\t# spill",
2258 Matcher::regName[dst_first],
2259 offset);
2260 #endif
2261 }
2262 }
2263 return 0;
2264 } else if (dst_first_rc == rc_kreg) {
2265 // mem -> kreg
2266 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2267 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2268 // 64-bit
2269 int offset = ra_->reg2offset(src_first);
2270 if (masm) {
2271 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2272 #ifndef PRODUCT
2273 } else {
2274 st->print("kmovq %s, [rsp + #%d]\t# spill",
2275 Matcher::regName[dst_first],
2276 offset);
2277 #endif
2278 }
2279 }
2280 return 0;
2281 }
2282 } else if (src_first_rc == rc_int) {
2283 // gpr ->
2284 if (dst_first_rc == rc_stack) {
2285 // gpr -> mem
2286 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2287 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2288 // 64-bit
2289 int offset = ra_->reg2offset(dst_first);
2290 if (masm) {
2291 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2292 #ifndef PRODUCT
2293 } else {
2294 st->print("movq [rsp + #%d], %s\t# spill",
2295 offset,
2296 Matcher::regName[src_first]);
2297 #endif
2298 }
2299 } else {
2300 // 32-bit
2301 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2302 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2303 int offset = ra_->reg2offset(dst_first);
2304 if (masm) {
2305 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2306 #ifndef PRODUCT
2307 } else {
2308 st->print("movl [rsp + #%d], %s\t# spill",
2309 offset,
2310 Matcher::regName[src_first]);
2311 #endif
2312 }
2313 }
2314 return 0;
2315 } else if (dst_first_rc == rc_int) {
2316 // gpr -> gpr
2317 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2318 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2319 // 64-bit
2320 if (masm) {
2321 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2322 as_Register(Matcher::_regEncode[src_first]));
2323 #ifndef PRODUCT
2324 } else {
2325 st->print("movq %s, %s\t# spill",
2326 Matcher::regName[dst_first],
2327 Matcher::regName[src_first]);
2328 #endif
2329 }
2330 return 0;
2331 } else {
2332 // 32-bit
2333 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2334 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2335 if (masm) {
2336 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2337 as_Register(Matcher::_regEncode[src_first]));
2338 #ifndef PRODUCT
2339 } else {
2340 st->print("movl %s, %s\t# spill",
2341 Matcher::regName[dst_first],
2342 Matcher::regName[src_first]);
2343 #endif
2344 }
2345 return 0;
2346 }
2347 } else if (dst_first_rc == rc_float) {
2348 // gpr -> xmm
2349 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2350 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2351 // 64-bit
2352 if (masm) {
2353 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2354 #ifndef PRODUCT
2355 } else {
2356 st->print("movdq %s, %s\t# spill",
2357 Matcher::regName[dst_first],
2358 Matcher::regName[src_first]);
2359 #endif
2360 }
2361 } else {
2362 // 32-bit
2363 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2364 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2365 if (masm) {
2366 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2367 #ifndef PRODUCT
2368 } else {
2369 st->print("movdl %s, %s\t# spill",
2370 Matcher::regName[dst_first],
2371 Matcher::regName[src_first]);
2372 #endif
2373 }
2374 }
2375 return 0;
2376 } else if (dst_first_rc == rc_kreg) {
2377 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2378 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2379 // 64-bit
2380 if (masm) {
2381 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2382 #ifndef PRODUCT
2383 } else {
2384 st->print("kmovq %s, %s\t# spill",
2385 Matcher::regName[dst_first],
2386 Matcher::regName[src_first]);
2387 #endif
2388 }
2389 }
2390 Unimplemented();
2391 return 0;
2392 }
2393 } else if (src_first_rc == rc_float) {
2394 // xmm ->
2395 if (dst_first_rc == rc_stack) {
2396 // xmm -> mem
2397 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2398 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2399 // 64-bit
2400 int offset = ra_->reg2offset(dst_first);
2401 if (masm) {
2402 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2403 #ifndef PRODUCT
2404 } else {
2405 st->print("movsd [rsp + #%d], %s\t# spill",
2406 offset,
2407 Matcher::regName[src_first]);
2408 #endif
2409 }
2410 } else {
2411 // 32-bit
2412 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2413 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2414 int offset = ra_->reg2offset(dst_first);
2415 if (masm) {
2416 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2417 #ifndef PRODUCT
2418 } else {
2419 st->print("movss [rsp + #%d], %s\t# spill",
2420 offset,
2421 Matcher::regName[src_first]);
2422 #endif
2423 }
2424 }
2425 return 0;
2426 } else if (dst_first_rc == rc_int) {
2427 // xmm -> gpr
2428 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2429 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2430 // 64-bit
2431 if (masm) {
2432 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2433 #ifndef PRODUCT
2434 } else {
2435 st->print("movdq %s, %s\t# spill",
2436 Matcher::regName[dst_first],
2437 Matcher::regName[src_first]);
2438 #endif
2439 }
2440 } else {
2441 // 32-bit
2442 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2443 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2444 if (masm) {
2445 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2446 #ifndef PRODUCT
2447 } else {
2448 st->print("movdl %s, %s\t# spill",
2449 Matcher::regName[dst_first],
2450 Matcher::regName[src_first]);
2451 #endif
2452 }
2453 }
2454 return 0;
2455 } else if (dst_first_rc == rc_float) {
2456 // xmm -> xmm
2457 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2458 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2459 // 64-bit
2460 if (masm) {
2461 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2462 #ifndef PRODUCT
2463 } else {
2464 st->print("%s %s, %s\t# spill",
2465 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2466 Matcher::regName[dst_first],
2467 Matcher::regName[src_first]);
2468 #endif
2469 }
2470 } else {
2471 // 32-bit
2472 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2473 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2474 if (masm) {
2475 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2476 #ifndef PRODUCT
2477 } else {
2478 st->print("%s %s, %s\t# spill",
2479 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2480 Matcher::regName[dst_first],
2481 Matcher::regName[src_first]);
2482 #endif
2483 }
2484 }
2485 return 0;
2486 } else if (dst_first_rc == rc_kreg) {
2487 assert(false, "Illegal spilling");
2488 return 0;
2489 }
2490 } else if (src_first_rc == rc_kreg) {
2491 if (dst_first_rc == rc_stack) {
2492 // mem -> kreg
2493 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2494 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2495 // 64-bit
2496 int offset = ra_->reg2offset(dst_first);
2497 if (masm) {
2498 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2499 #ifndef PRODUCT
2500 } else {
2501 st->print("kmovq [rsp + #%d] , %s\t# spill",
2502 offset,
2503 Matcher::regName[src_first]);
2504 #endif
2505 }
2506 }
2507 return 0;
2508 } else if (dst_first_rc == rc_int) {
2509 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2510 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2511 // 64-bit
2512 if (masm) {
2513 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2514 #ifndef PRODUCT
2515 } else {
2516 st->print("kmovq %s, %s\t# spill",
2517 Matcher::regName[dst_first],
2518 Matcher::regName[src_first]);
2519 #endif
2520 }
2521 }
2522 Unimplemented();
2523 return 0;
2524 } else if (dst_first_rc == rc_kreg) {
2525 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2526 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2527 // 64-bit
2528 if (masm) {
2529 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2530 #ifndef PRODUCT
2531 } else {
2532 st->print("kmovq %s, %s\t# spill",
2533 Matcher::regName[dst_first],
2534 Matcher::regName[src_first]);
2535 #endif
2536 }
2537 }
2538 return 0;
2539 } else if (dst_first_rc == rc_float) {
2540 assert(false, "Illegal spill");
2541 return 0;
2542 }
2543 }
2544
2545 assert(0," foo ");
2546 Unimplemented();
2547 return 0;
2548 }
2549
2550 #ifndef PRODUCT
2551 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2552 implementation(nullptr, ra_, false, st);
2553 }
2554 #endif
2555
2556 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2557 implementation(masm, ra_, false, nullptr);
2558 }
2559
2560 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2561 return MachNode::size(ra_);
2562 }
2563
2564 //=============================================================================
2565 #ifndef PRODUCT
2566 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2567 {
2568 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2569 int reg = ra_->get_reg_first(this);
2570 st->print("leaq %s, [rsp + #%d]\t# box lock",
2571 Matcher::regName[reg], offset);
2572 }
2573 #endif
2574
2575 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2576 {
2577 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2578 int reg = ra_->get_encode(this);
2579
2580 __ lea(as_Register(reg), Address(rsp, offset));
2581 }
2582
2583 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2584 {
2585 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2586 if (ra_->get_encode(this) > 15) {
2587 return (offset < 0x80) ? 6 : 9; // REX2
2588 } else {
2589 return (offset < 0x80) ? 5 : 8; // REX
2590 }
2591 }
2592
2593 //=============================================================================
2594 #ifndef PRODUCT
2595 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2596 {
2597 st->print_cr("MachVEPNode");
2598 }
2599 #endif
2600
2601 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2602 {
2603 CodeBuffer* cbuf = masm->code();
2604 uint insts_size = cbuf->insts_size();
2605 if (!_verified) {
2606 __ ic_check(1);
2607 } else {
2608 if (ra_->C->stub_function() == nullptr) {
2609 // Emit the entry barrier in a temporary frame before unpacking because
2610 // it can deopt, which would require packing the scalarized args again.
2611 __ verified_entry(ra_->C, 0);
2612 __ entry_barrier();
2613 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2614 __ remove_frame(initial_framesize, false);
2615 }
2616 // Unpack inline type args passed as oop and then jump to
2617 // the verified entry point (skipping the unverified entry).
2618 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2619 // Emit code for verified entry and save increment for stack repair on return
2620 __ verified_entry(ra_->C, sp_inc);
2621 if (Compile::current()->output()->in_scratch_emit_size()) {
2622 Label dummy_verified_entry;
2623 __ jmp(dummy_verified_entry);
2624 } else {
2625 __ jmp(*_verified_entry);
2626 }
2627 }
2628 /* WARNING these NOPs are critical so that verified entry point is properly
2629 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2630 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2631 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2632 if (nops_cnt > 0) {
2633 __ nop(nops_cnt);
2634 }
2635 }
2636
2637 //=============================================================================
2638 #ifndef PRODUCT
2639 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2640 {
2641 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2642 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2643 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2644 }
2645 #endif
2646
2647 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2648 {
2649 __ ic_check(InteriorEntryAlignment);
2650 }
2651
2652
2653 //=============================================================================
2654
2655 bool Matcher::supports_vector_calling_convention(void) {
2656 return EnableVectorSupport;
2657 }
2658
2659 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2660 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2661 }
2662
2663 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2664 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2665 }
2666
2667 #ifdef ASSERT
2668 static bool is_ndd_demotable(const MachNode* mdef) {
2669 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2670 }
2671 #endif
2672
2673 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2674 int oper_index) {
2675 if (mdef == nullptr) {
2676 return false;
2677 }
2678
2679 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2680 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2681 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2682 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2683 return false;
2684 }
2685
2686 // Complex memory operand covers multiple incoming edges needed for
2687 // address computation. Biasing def towards any address component will not
2688 // result in NDD demotion by assembler.
2689 if (mdef->operand_num_edges(oper_index) != 1) {
2690 return false;
2691 }
2692
2693 // Demotion candidate must be register mask compatible with definition.
2694 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2695 if (!oper_mask.overlap(mdef->out_RegMask())) {
2696 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2697 return false;
2698 }
2699
2700 switch (oper_index) {
2701 // First operand of MachNode corresponding to Intel APX NDD selection
2702 // pattern can share its assigned register with definition operand if
2703 // their live ranges do not overlap. In such a scenario we can demote
2704 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2705 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2706 // are decorated with a special flag by instruction selector.
2707 case 1:
2708 return is_ndd_demotable_opr1(mdef);
2709
2710 // Definition operand of commutative operation can be biased towards second
2711 // operand.
2712 case 2:
2713 return is_ndd_demotable_opr2(mdef);
2714
2715 // Current scheme only selects up to two biasing candidates
2716 default:
2717 assert(false, "unhandled operand index: %s", mdef->Name());
2718 break;
2719 }
2720
2721 return false;
2722 }
2723
2724 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2725 assert(EnableVectorSupport, "sanity");
2726 int lo = XMM0_num;
2727 int hi = XMM0b_num;
2728 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2729 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2730 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2731 return OptoRegPair(hi, lo);
2732 }
2733
2734 // Is this branch offset short enough that a short branch can be used?
2735 //
2736 // NOTE: If the platform does not provide any short branch variants, then
2737 // this method should return false for offset 0.
2738 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2739 // The passed offset is relative to address of the branch.
2740 // On 86 a branch displacement is calculated relative to address
2741 // of a next instruction.
2742 offset -= br_size;
2743
2744 // the short version of jmpConUCF2 contains multiple branches,
2745 // making the reach slightly less
2746 if (rule == jmpConUCF2_rule)
2747 return (-126 <= offset && offset <= 125);
2748 return (-128 <= offset && offset <= 127);
2749 }
2750
2751 #ifdef ASSERT
2752 // Return whether or not this register is ever used as an argument.
2753 bool Matcher::can_be_java_arg(int reg)
2754 {
2755 return
2756 reg == RDI_num || reg == RDI_H_num ||
2757 reg == RSI_num || reg == RSI_H_num ||
2758 reg == RDX_num || reg == RDX_H_num ||
2759 reg == RCX_num || reg == RCX_H_num ||
2760 reg == R8_num || reg == R8_H_num ||
2761 reg == R9_num || reg == R9_H_num ||
2762 reg == R12_num || reg == R12_H_num ||
2763 reg == XMM0_num || reg == XMM0b_num ||
2764 reg == XMM1_num || reg == XMM1b_num ||
2765 reg == XMM2_num || reg == XMM2b_num ||
2766 reg == XMM3_num || reg == XMM3b_num ||
2767 reg == XMM4_num || reg == XMM4b_num ||
2768 reg == XMM5_num || reg == XMM5b_num ||
2769 reg == XMM6_num || reg == XMM6b_num ||
2770 reg == XMM7_num || reg == XMM7b_num;
2771 }
2772 #endif
2773
2774 uint Matcher::int_pressure_limit()
2775 {
2776 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2777 }
2778
2779 uint Matcher::float_pressure_limit()
2780 {
2781 // After experiment around with different values, the following default threshold
2782 // works best for LCM's register pressure scheduling on x64.
2783 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2784 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2785 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2786 }
2787
2788 // Register for DIVI projection of divmodI
2789 const RegMask& Matcher::divI_proj_mask() {
2790 return INT_RAX_REG_mask();
2791 }
2792
2793 // Register for MODI projection of divmodI
2794 const RegMask& Matcher::modI_proj_mask() {
2795 return INT_RDX_REG_mask();
2796 }
2797
2798 // Register for DIVL projection of divmodL
2799 const RegMask& Matcher::divL_proj_mask() {
2800 return LONG_RAX_REG_mask();
2801 }
2802
2803 // Register for MODL projection of divmodL
2804 const RegMask& Matcher::modL_proj_mask() {
2805 return LONG_RDX_REG_mask();
2806 }
2807
2808 %}
2809
2810 source_hpp %{
2811 // Header information of the source block.
2812 // Method declarations/definitions which are used outside
2813 // the ad-scope can conveniently be defined here.
2814 //
2815 // To keep related declarations/definitions/uses close together,
2816 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2817
2818 #include "runtime/vm_version.hpp"
2819
2820 class NativeJump;
2821
2822 class CallStubImpl {
2823
2824 //--------------------------------------------------------------
2825 //---< Used for optimization in Compile::shorten_branches >---
2826 //--------------------------------------------------------------
2827
2828 public:
2829 // Size of call trampoline stub.
2830 static uint size_call_trampoline() {
2831 return 0; // no call trampolines on this platform
2832 }
2833
2834 // number of relocations needed by a call trampoline stub
2835 static uint reloc_call_trampoline() {
2836 return 0; // no call trampolines on this platform
2837 }
2838 };
2839
2840 class HandlerImpl {
2841
2842 public:
2843
2844 static int emit_deopt_handler(C2_MacroAssembler* masm);
2845
2846 static uint size_deopt_handler() {
2847 // one call and one jmp.
2848 return 7;
2849 }
2850 };
2851
2852 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2853 switch(bytes) {
2854 case 4: // fall-through
2855 case 8: // fall-through
2856 case 16: return Assembler::AVX_128bit;
2857 case 32: return Assembler::AVX_256bit;
2858 case 64: return Assembler::AVX_512bit;
2859
2860 default: {
2861 ShouldNotReachHere();
2862 return Assembler::AVX_NoVec;
2863 }
2864 }
2865 }
2866
2867 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2868 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2869 }
2870
2871 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2872 uint def_idx = use->operand_index(opnd);
2873 Node* def = use->in(def_idx);
2874 return vector_length_encoding(def);
2875 }
2876
2877 static inline bool is_vector_popcount_predicate(BasicType bt) {
2878 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2879 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2880 }
2881
2882 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2883 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2884 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2885 }
2886
2887 class Node::PD {
2888 public:
2889 enum NodeFlags : uint64_t {
2890 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2891 Flag_sets_carry_flag = Node::_last_flag << 2,
2892 Flag_sets_parity_flag = Node::_last_flag << 3,
2893 Flag_sets_zero_flag = Node::_last_flag << 4,
2894 Flag_sets_overflow_flag = Node::_last_flag << 5,
2895 Flag_sets_sign_flag = Node::_last_flag << 6,
2896 Flag_clears_carry_flag = Node::_last_flag << 7,
2897 Flag_clears_parity_flag = Node::_last_flag << 8,
2898 Flag_clears_zero_flag = Node::_last_flag << 9,
2899 Flag_clears_overflow_flag = Node::_last_flag << 10,
2900 Flag_clears_sign_flag = Node::_last_flag << 11,
2901 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2902 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2903 _last_flag = Flag_ndd_demotable_opr2
2904 };
2905 };
2906
2907 %} // end source_hpp
2908
2909 source %{
2910
2911 #include "opto/addnode.hpp"
2912 #include "c2_intelJccErratum_x86.hpp"
2913
2914 void PhaseOutput::pd_perform_mach_node_analysis() {
2915 if (VM_Version::has_intel_jcc_erratum()) {
2916 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2917 _buf_sizes._code += extra_padding;
2918 }
2919 }
2920
2921 int MachNode::pd_alignment_required() const {
2922 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2923 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2924 return IntelJccErratum::largest_jcc_size() + 1;
2925 } else {
2926 return 1;
2927 }
2928 }
2929
2930 int MachNode::compute_padding(int current_offset) const {
2931 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2932 Compile* C = Compile::current();
2933 PhaseOutput* output = C->output();
2934 Block* block = output->block();
2935 int index = output->index();
2936 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2937 } else {
2938 return 0;
2939 }
2940 }
2941
2942 // Emit deopt handler code.
2943 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2944
2945 // Note that the code buffer's insts_mark is always relative to insts.
2946 // That's why we must use the macroassembler to generate a handler.
2947 address base = __ start_a_stub(size_deopt_handler());
2948 if (base == nullptr) {
2949 ciEnv::current()->record_failure("CodeCache is full");
2950 return 0; // CodeBuffer::expand failed
2951 }
2952 int offset = __ offset();
2953
2954 Label start;
2955 __ bind(start);
2956
2957 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2958
2959 int entry_offset = __ offset();
2960
2961 __ jmp(start);
2962
2963 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2964 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2965 "out of bounds read in post-call NOP check");
2966 __ end_a_stub();
2967 return entry_offset;
2968 }
2969
2970 static Assembler::Width widthForType(BasicType bt) {
2971 if (bt == T_BYTE) {
2972 return Assembler::B;
2973 } else if (bt == T_SHORT) {
2974 return Assembler::W;
2975 } else if (bt == T_INT) {
2976 return Assembler::D;
2977 } else {
2978 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2979 return Assembler::Q;
2980 }
2981 }
2982
2983 //=============================================================================
2984
2985 // Float masks come from different places depending on platform.
2986 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2987 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2988 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2989 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2990 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2991 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2992 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2993 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2994 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2995 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2996 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2997 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2998 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2999 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
3000 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
3001 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
3002 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
3003 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
3004 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
3005
3006 //=============================================================================
3007 bool Matcher::match_rule_supported(int opcode) {
3008 if (!has_match_rule(opcode)) {
3009 return false; // no match rule present
3010 }
3011 switch (opcode) {
3012 case Op_AbsVL:
3013 case Op_StoreVectorScatter:
3014 if (UseAVX < 3) {
3015 return false;
3016 }
3017 break;
3018 case Op_PopCountI:
3019 case Op_PopCountL:
3020 if (!UsePopCountInstruction) {
3021 return false;
3022 }
3023 break;
3024 case Op_PopCountVI:
3025 if (UseAVX < 2) {
3026 return false;
3027 }
3028 break;
3029 case Op_CompressV:
3030 case Op_ExpandV:
3031 case Op_PopCountVL:
3032 if (UseAVX < 2) {
3033 return false;
3034 }
3035 break;
3036 case Op_MulVI:
3037 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3038 return false;
3039 }
3040 break;
3041 case Op_MulVL:
3042 if (UseSSE < 4) { // only with SSE4_1 or AVX
3043 return false;
3044 }
3045 break;
3046 case Op_MulReductionVL:
3047 if (VM_Version::supports_avx512dq() == false) {
3048 return false;
3049 }
3050 break;
3051 case Op_AbsVB:
3052 case Op_AbsVS:
3053 case Op_AbsVI:
3054 case Op_AddReductionVI:
3055 case Op_AndReductionV:
3056 case Op_OrReductionV:
3057 case Op_XorReductionV:
3058 if (UseSSE < 3) { // requires at least SSSE3
3059 return false;
3060 }
3061 break;
3062 case Op_MaxHF:
3063 case Op_MinHF:
3064 if (!VM_Version::supports_avx512vlbw()) {
3065 return false;
3066 } // fallthrough
3067 case Op_AddHF:
3068 case Op_DivHF:
3069 case Op_FmaHF:
3070 case Op_MulHF:
3071 case Op_ReinterpretS2HF:
3072 case Op_ReinterpretHF2S:
3073 case Op_SubHF:
3074 case Op_SqrtHF:
3075 if (!VM_Version::supports_avx512_fp16()) {
3076 return false;
3077 }
3078 break;
3079 case Op_VectorLoadShuffle:
3080 case Op_VectorRearrange:
3081 case Op_MulReductionVI:
3082 if (UseSSE < 4) { // requires at least SSE4
3083 return false;
3084 }
3085 break;
3086 case Op_IsInfiniteF:
3087 case Op_IsInfiniteD:
3088 if (!VM_Version::supports_avx512dq()) {
3089 return false;
3090 }
3091 break;
3092 case Op_SqrtVD:
3093 case Op_SqrtVF:
3094 case Op_VectorMaskCmp:
3095 case Op_VectorCastB2X:
3096 case Op_VectorCastS2X:
3097 case Op_VectorCastI2X:
3098 case Op_VectorCastL2X:
3099 case Op_VectorCastF2X:
3100 case Op_VectorCastD2X:
3101 case Op_VectorUCastB2X:
3102 case Op_VectorUCastS2X:
3103 case Op_VectorUCastI2X:
3104 case Op_VectorMaskCast:
3105 if (UseAVX < 1) { // enabled for AVX only
3106 return false;
3107 }
3108 break;
3109 case Op_PopulateIndex:
3110 if (UseAVX < 2) {
3111 return false;
3112 }
3113 break;
3114 case Op_RoundVF:
3115 if (UseAVX < 2) { // enabled for AVX2 only
3116 return false;
3117 }
3118 break;
3119 case Op_RoundVD:
3120 if (UseAVX < 3) {
3121 return false; // enabled for AVX3 only
3122 }
3123 break;
3124 case Op_CompareAndSwapL:
3125 case Op_CompareAndSwapP:
3126 break;
3127 case Op_StrIndexOf:
3128 if (!UseSSE42Intrinsics) {
3129 return false;
3130 }
3131 break;
3132 case Op_StrIndexOfChar:
3133 if (!UseSSE42Intrinsics) {
3134 return false;
3135 }
3136 break;
3137 case Op_OnSpinWait:
3138 if (VM_Version::supports_on_spin_wait() == false) {
3139 return false;
3140 }
3141 break;
3142 case Op_MulVB:
3143 case Op_LShiftVB:
3144 case Op_RShiftVB:
3145 case Op_URShiftVB:
3146 case Op_VectorInsert:
3147 case Op_VectorLoadMask:
3148 case Op_VectorStoreMask:
3149 case Op_VectorBlend:
3150 if (UseSSE < 4) {
3151 return false;
3152 }
3153 break;
3154 case Op_MaxD:
3155 case Op_MaxF:
3156 case Op_MinD:
3157 case Op_MinF:
3158 if (UseAVX < 1) { // enabled for AVX only
3159 return false;
3160 }
3161 break;
3162 case Op_CacheWB:
3163 case Op_CacheWBPreSync:
3164 case Op_CacheWBPostSync:
3165 if (!VM_Version::supports_data_cache_line_flush()) {
3166 return false;
3167 }
3168 break;
3169 case Op_ExtractB:
3170 case Op_ExtractL:
3171 case Op_ExtractI:
3172 case Op_RoundDoubleMode:
3173 if (UseSSE < 4) {
3174 return false;
3175 }
3176 break;
3177 case Op_RoundDoubleModeV:
3178 if (VM_Version::supports_avx() == false) {
3179 return false; // 128bit vroundpd is not available
3180 }
3181 break;
3182 case Op_LoadVectorGather:
3183 case Op_LoadVectorGatherMasked:
3184 if (UseAVX < 2) {
3185 return false;
3186 }
3187 break;
3188 case Op_FmaF:
3189 case Op_FmaD:
3190 case Op_FmaVD:
3191 case Op_FmaVF:
3192 if (!UseFMA) {
3193 return false;
3194 }
3195 break;
3196 case Op_MacroLogicV:
3197 if (UseAVX < 3 || !UseVectorMacroLogic) {
3198 return false;
3199 }
3200 break;
3201
3202 case Op_VectorCmpMasked:
3203 case Op_VectorMaskGen:
3204 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3205 return false;
3206 }
3207 break;
3208 case Op_VectorMaskFirstTrue:
3209 case Op_VectorMaskLastTrue:
3210 case Op_VectorMaskTrueCount:
3211 case Op_VectorMaskToLong:
3212 if (UseAVX < 1) {
3213 return false;
3214 }
3215 break;
3216 case Op_RoundF:
3217 case Op_RoundD:
3218 break;
3219 case Op_CopySignD:
3220 case Op_CopySignF:
3221 if (UseAVX < 3) {
3222 return false;
3223 }
3224 if (!VM_Version::supports_avx512vl()) {
3225 return false;
3226 }
3227 break;
3228 case Op_CompressBits:
3229 case Op_ExpandBits:
3230 if (!VM_Version::supports_bmi2()) {
3231 return false;
3232 }
3233 break;
3234 case Op_CompressM:
3235 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3236 return false;
3237 }
3238 break;
3239 case Op_ConvF2HF:
3240 case Op_ConvHF2F:
3241 if (!VM_Version::supports_float16()) {
3242 return false;
3243 }
3244 break;
3245 case Op_VectorCastF2HF:
3246 case Op_VectorCastHF2F:
3247 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3248 return false;
3249 }
3250 break;
3251 }
3252 return true; // Match rules are supported by default.
3253 }
3254
3255 //------------------------------------------------------------------------
3256
3257 static inline bool is_pop_count_instr_target(BasicType bt) {
3258 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3259 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3260 }
3261
3262 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3263 return match_rule_supported_vector(opcode, vlen, bt);
3264 }
3265
3266 // Identify extra cases that we might want to provide match rules for vector nodes and
3267 // other intrinsics guarded with vector length (vlen) and element type (bt).
3268 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3269 if (!match_rule_supported(opcode)) {
3270 return false;
3271 }
3272 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3273 // * SSE2 supports 128bit vectors for all types;
3274 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3275 // * AVX2 supports 256bit vectors for all types;
3276 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3277 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3278 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3279 // And MaxVectorSize is taken into account as well.
3280 if (!vector_size_supported(bt, vlen)) {
3281 return false;
3282 }
3283 // Special cases which require vector length follow:
3284 // * implementation limitations
3285 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3286 // * 128bit vroundpd instruction is present only in AVX1
3287 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3288 switch (opcode) {
3289 case Op_MaxVHF:
3290 case Op_MinVHF:
3291 if (!VM_Version::supports_avx512bw()) {
3292 return false;
3293 }
3294 case Op_AddVHF:
3295 case Op_DivVHF:
3296 case Op_FmaVHF:
3297 case Op_MulVHF:
3298 case Op_SubVHF:
3299 case Op_SqrtVHF:
3300 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3301 return false;
3302 }
3303 if (!VM_Version::supports_avx512_fp16()) {
3304 return false;
3305 }
3306 break;
3307 case Op_AbsVF:
3308 case Op_NegVF:
3309 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3310 return false; // 512bit vandps and vxorps are not available
3311 }
3312 break;
3313 case Op_AbsVD:
3314 case Op_NegVD:
3315 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3316 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3317 }
3318 break;
3319 case Op_RotateRightV:
3320 case Op_RotateLeftV:
3321 if (bt != T_INT && bt != T_LONG) {
3322 return false;
3323 } // fallthrough
3324 case Op_MacroLogicV:
3325 if (!VM_Version::supports_evex() ||
3326 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3327 return false;
3328 }
3329 break;
3330 case Op_ClearArray:
3331 case Op_VectorMaskGen:
3332 case Op_VectorCmpMasked:
3333 if (!VM_Version::supports_avx512bw()) {
3334 return false;
3335 }
3336 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3337 return false;
3338 }
3339 break;
3340 case Op_LoadVectorMasked:
3341 case Op_StoreVectorMasked:
3342 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3343 return false;
3344 }
3345 break;
3346 case Op_UMinV:
3347 case Op_UMaxV:
3348 if (UseAVX == 0) {
3349 return false;
3350 }
3351 break;
3352 case Op_UMinReductionV:
3353 case Op_UMaxReductionV:
3354 if (UseAVX == 0) {
3355 return false;
3356 }
3357 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3358 return false;
3359 }
3360 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3361 return false;
3362 }
3363 break;
3364 case Op_MaxV:
3365 case Op_MinV:
3366 if (UseSSE < 4 && is_integral_type(bt)) {
3367 return false;
3368 }
3369 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3370 // Float/Double intrinsics are enabled for AVX family currently.
3371 if (UseAVX == 0) {
3372 return false;
3373 }
3374 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3375 return false;
3376 }
3377 }
3378 break;
3379 case Op_CallLeafVector:
3380 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3381 return false;
3382 }
3383 break;
3384 case Op_AddReductionVI:
3385 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3386 return false;
3387 }
3388 // fallthrough
3389 case Op_AndReductionV:
3390 case Op_OrReductionV:
3391 case Op_XorReductionV:
3392 if (is_subword_type(bt) && (UseSSE < 4)) {
3393 return false;
3394 }
3395 break;
3396 case Op_MinReductionV:
3397 case Op_MaxReductionV:
3398 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3399 return false;
3400 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3401 return false;
3402 }
3403 // Float/Double intrinsics enabled for AVX family.
3404 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3405 return false;
3406 }
3407 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3408 return false;
3409 }
3410 break;
3411 case Op_VectorBlend:
3412 if (UseAVX == 0 && size_in_bits < 128) {
3413 return false;
3414 }
3415 break;
3416 case Op_VectorTest:
3417 if (UseSSE < 4) {
3418 return false; // Implementation limitation
3419 } else if (size_in_bits < 32) {
3420 return false; // Implementation limitation
3421 }
3422 break;
3423 case Op_VectorLoadShuffle:
3424 case Op_VectorRearrange:
3425 if(vlen == 2) {
3426 return false; // Implementation limitation due to how shuffle is loaded
3427 } else if (size_in_bits == 256 && UseAVX < 2) {
3428 return false; // Implementation limitation
3429 }
3430 break;
3431 case Op_VectorLoadMask:
3432 case Op_VectorMaskCast:
3433 if (size_in_bits == 256 && UseAVX < 2) {
3434 return false; // Implementation limitation
3435 }
3436 // fallthrough
3437 case Op_VectorStoreMask:
3438 if (vlen == 2) {
3439 return false; // Implementation limitation
3440 }
3441 break;
3442 case Op_PopulateIndex:
3443 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3444 return false;
3445 }
3446 break;
3447 case Op_VectorCastB2X:
3448 case Op_VectorCastS2X:
3449 case Op_VectorCastI2X:
3450 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3451 return false;
3452 }
3453 break;
3454 case Op_VectorCastL2X:
3455 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3456 return false;
3457 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3458 return false;
3459 }
3460 break;
3461 case Op_VectorCastF2X: {
3462 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3463 // happen after intermediate conversion to integer and special handling
3464 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3465 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3466 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3467 return false;
3468 }
3469 }
3470 // fallthrough
3471 case Op_VectorCastD2X:
3472 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3473 return false;
3474 }
3475 break;
3476 case Op_VectorCastF2HF:
3477 case Op_VectorCastHF2F:
3478 if (!VM_Version::supports_f16c() &&
3479 ((!VM_Version::supports_evex() ||
3480 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3481 return false;
3482 }
3483 break;
3484 case Op_RoundVD:
3485 if (!VM_Version::supports_avx512dq()) {
3486 return false;
3487 }
3488 break;
3489 case Op_MulReductionVI:
3490 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3491 return false;
3492 }
3493 break;
3494 case Op_LoadVectorGatherMasked:
3495 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3496 return false;
3497 }
3498 if (is_subword_type(bt) &&
3499 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3500 (size_in_bits < 64) ||
3501 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3502 return false;
3503 }
3504 break;
3505 case Op_StoreVectorScatterMasked:
3506 case Op_StoreVectorScatter:
3507 if (is_subword_type(bt)) {
3508 return false;
3509 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3510 return false;
3511 }
3512 // fallthrough
3513 case Op_LoadVectorGather:
3514 if (!is_subword_type(bt) && size_in_bits == 64) {
3515 return false;
3516 }
3517 if (is_subword_type(bt) && size_in_bits < 64) {
3518 return false;
3519 }
3520 break;
3521 case Op_SaturatingAddV:
3522 case Op_SaturatingSubV:
3523 if (UseAVX < 1) {
3524 return false; // Implementation limitation
3525 }
3526 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3527 return false;
3528 }
3529 break;
3530 case Op_SelectFromTwoVector:
3531 if (size_in_bits < 128) {
3532 return false;
3533 }
3534 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3535 return false;
3536 }
3537 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3538 return false;
3539 }
3540 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3541 return false;
3542 }
3543 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3544 return false;
3545 }
3546 break;
3547 case Op_MaskAll:
3548 if (!VM_Version::supports_evex()) {
3549 return false;
3550 }
3551 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3552 return false;
3553 }
3554 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3555 return false;
3556 }
3557 break;
3558 case Op_VectorMaskCmp:
3559 if (vlen < 2 || size_in_bits < 32) {
3560 return false;
3561 }
3562 break;
3563 case Op_CompressM:
3564 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3565 return false;
3566 }
3567 break;
3568 case Op_CompressV:
3569 case Op_ExpandV:
3570 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3571 return false;
3572 }
3573 if (size_in_bits < 128 ) {
3574 return false;
3575 }
3576 case Op_VectorLongToMask:
3577 if (UseAVX < 1) {
3578 return false;
3579 }
3580 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3581 return false;
3582 }
3583 break;
3584 case Op_SignumVD:
3585 case Op_SignumVF:
3586 if (UseAVX < 1) {
3587 return false;
3588 }
3589 break;
3590 case Op_PopCountVI:
3591 case Op_PopCountVL: {
3592 if (!is_pop_count_instr_target(bt) &&
3593 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3594 return false;
3595 }
3596 }
3597 break;
3598 case Op_ReverseV:
3599 case Op_ReverseBytesV:
3600 if (UseAVX < 2) {
3601 return false;
3602 }
3603 break;
3604 case Op_CountTrailingZerosV:
3605 case Op_CountLeadingZerosV:
3606 if (UseAVX < 2) {
3607 return false;
3608 }
3609 break;
3610 }
3611 return true; // Per default match rules are supported.
3612 }
3613
3614 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3615 // ADLC based match_rule_supported routine checks for the existence of pattern based
3616 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3617 // of their non-masked counterpart with mask edge being the differentiator.
3618 // This routine does a strict check on the existence of masked operation patterns
3619 // by returning a default false value for all the other opcodes apart from the
3620 // ones whose masked instruction patterns are defined in this file.
3621 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3622 return false;
3623 }
3624
3625 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3626 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3627 return false;
3628 }
3629 switch(opcode) {
3630 // Unary masked operations
3631 case Op_AbsVB:
3632 case Op_AbsVS:
3633 if(!VM_Version::supports_avx512bw()) {
3634 return false; // Implementation limitation
3635 }
3636 case Op_AbsVI:
3637 case Op_AbsVL:
3638 return true;
3639
3640 // Ternary masked operations
3641 case Op_FmaVF:
3642 case Op_FmaVD:
3643 return true;
3644
3645 case Op_MacroLogicV:
3646 if(bt != T_INT && bt != T_LONG) {
3647 return false;
3648 }
3649 return true;
3650
3651 // Binary masked operations
3652 case Op_AddVB:
3653 case Op_AddVS:
3654 case Op_SubVB:
3655 case Op_SubVS:
3656 case Op_MulVS:
3657 case Op_LShiftVS:
3658 case Op_RShiftVS:
3659 case Op_URShiftVS:
3660 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3661 if (!VM_Version::supports_avx512bw()) {
3662 return false; // Implementation limitation
3663 }
3664 return true;
3665
3666 case Op_MulVL:
3667 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3668 if (!VM_Version::supports_avx512dq()) {
3669 return false; // Implementation limitation
3670 }
3671 return true;
3672
3673 case Op_AndV:
3674 case Op_OrV:
3675 case Op_XorV:
3676 case Op_RotateRightV:
3677 case Op_RotateLeftV:
3678 if (bt != T_INT && bt != T_LONG) {
3679 return false; // Implementation limitation
3680 }
3681 return true;
3682
3683 case Op_VectorLoadMask:
3684 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3685 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3686 return false;
3687 }
3688 return true;
3689
3690 case Op_AddVI:
3691 case Op_AddVL:
3692 case Op_AddVF:
3693 case Op_AddVD:
3694 case Op_SubVI:
3695 case Op_SubVL:
3696 case Op_SubVF:
3697 case Op_SubVD:
3698 case Op_MulVI:
3699 case Op_MulVF:
3700 case Op_MulVD:
3701 case Op_DivVF:
3702 case Op_DivVD:
3703 case Op_SqrtVF:
3704 case Op_SqrtVD:
3705 case Op_LShiftVI:
3706 case Op_LShiftVL:
3707 case Op_RShiftVI:
3708 case Op_RShiftVL:
3709 case Op_URShiftVI:
3710 case Op_URShiftVL:
3711 case Op_LoadVectorMasked:
3712 case Op_StoreVectorMasked:
3713 case Op_LoadVectorGatherMasked:
3714 case Op_StoreVectorScatterMasked:
3715 return true;
3716
3717 case Op_UMinV:
3718 case Op_UMaxV:
3719 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3720 return false;
3721 } // fallthrough
3722 case Op_MaxV:
3723 case Op_MinV:
3724 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3725 return false; // Implementation limitation
3726 }
3727 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3728 return false; // Implementation limitation
3729 }
3730 return true;
3731 case Op_SaturatingAddV:
3732 case Op_SaturatingSubV:
3733 if (!is_subword_type(bt)) {
3734 return false;
3735 }
3736 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3737 return false; // Implementation limitation
3738 }
3739 return true;
3740
3741 case Op_VectorMaskCmp:
3742 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3743 return false; // Implementation limitation
3744 }
3745 return true;
3746
3747 case Op_VectorRearrange:
3748 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3749 return false; // Implementation limitation
3750 }
3751 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3752 return false; // Implementation limitation
3753 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3754 return false; // Implementation limitation
3755 }
3756 return true;
3757
3758 // Binary Logical operations
3759 case Op_AndVMask:
3760 case Op_OrVMask:
3761 case Op_XorVMask:
3762 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3763 return false; // Implementation limitation
3764 }
3765 return true;
3766
3767 case Op_PopCountVI:
3768 case Op_PopCountVL:
3769 if (!is_pop_count_instr_target(bt)) {
3770 return false;
3771 }
3772 return true;
3773
3774 case Op_MaskAll:
3775 return true;
3776
3777 case Op_CountLeadingZerosV:
3778 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3779 return true;
3780 }
3781 default:
3782 return false;
3783 }
3784 }
3785
3786 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3787 return false;
3788 }
3789
3790 // Return true if Vector::rearrange needs preparation of the shuffle argument
3791 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3792 switch (elem_bt) {
3793 case T_BYTE: return false;
3794 case T_SHORT: return !VM_Version::supports_avx512bw();
3795 case T_INT: return !VM_Version::supports_avx();
3796 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3797 default:
3798 ShouldNotReachHere();
3799 return false;
3800 }
3801 }
3802
3803 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3804 // Prefer predicate if the mask type is "TypeVectMask".
3805 return vt->isa_vectmask() != nullptr;
3806 }
3807
3808 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3809 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3810 bool legacy = (generic_opnd->opcode() == LEGVEC);
3811 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3812 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3813 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3814 return new legVecZOper();
3815 }
3816 if (legacy) {
3817 switch (ideal_reg) {
3818 case Op_VecS: return new legVecSOper();
3819 case Op_VecD: return new legVecDOper();
3820 case Op_VecX: return new legVecXOper();
3821 case Op_VecY: return new legVecYOper();
3822 case Op_VecZ: return new legVecZOper();
3823 }
3824 } else {
3825 switch (ideal_reg) {
3826 case Op_VecS: return new vecSOper();
3827 case Op_VecD: return new vecDOper();
3828 case Op_VecX: return new vecXOper();
3829 case Op_VecY: return new vecYOper();
3830 case Op_VecZ: return new vecZOper();
3831 }
3832 }
3833 ShouldNotReachHere();
3834 return nullptr;
3835 }
3836
3837 bool Matcher::is_reg2reg_move(MachNode* m) {
3838 switch (m->rule()) {
3839 case MoveVec2Leg_rule:
3840 case MoveLeg2Vec_rule:
3841 case MoveF2VL_rule:
3842 case MoveF2LEG_rule:
3843 case MoveVL2F_rule:
3844 case MoveLEG2F_rule:
3845 case MoveD2VL_rule:
3846 case MoveD2LEG_rule:
3847 case MoveVL2D_rule:
3848 case MoveLEG2D_rule:
3849 return true;
3850 default:
3851 return false;
3852 }
3853 }
3854
3855 bool Matcher::is_generic_vector(MachOper* opnd) {
3856 switch (opnd->opcode()) {
3857 case VEC:
3858 case LEGVEC:
3859 return true;
3860 default:
3861 return false;
3862 }
3863 }
3864
3865 //------------------------------------------------------------------------
3866
3867 const RegMask* Matcher::predicate_reg_mask(void) {
3868 return &_VECTMASK_REG_mask;
3869 }
3870
3871 // Max vector size in bytes. 0 if not supported.
3872 int Matcher::vector_width_in_bytes(BasicType bt) {
3873 assert(is_java_primitive(bt), "only primitive type vectors");
3874 // SSE2 supports 128bit vectors for all types.
3875 // AVX2 supports 256bit vectors for all types.
3876 // AVX2/EVEX supports 512bit vectors for all types.
3877 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3878 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3879 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3880 size = (UseAVX > 2) ? 64 : 32;
3881 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3882 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3883 // Use flag to limit vector size.
3884 size = MIN2(size,(int)MaxVectorSize);
3885 // Minimum 2 values in vector (or 4 for bytes).
3886 switch (bt) {
3887 case T_DOUBLE:
3888 case T_LONG:
3889 if (size < 16) return 0;
3890 break;
3891 case T_FLOAT:
3892 case T_INT:
3893 if (size < 8) return 0;
3894 break;
3895 case T_BOOLEAN:
3896 if (size < 4) return 0;
3897 break;
3898 case T_CHAR:
3899 if (size < 4) return 0;
3900 break;
3901 case T_BYTE:
3902 if (size < 4) return 0;
3903 break;
3904 case T_SHORT:
3905 if (size < 4) return 0;
3906 break;
3907 default:
3908 ShouldNotReachHere();
3909 }
3910 return size;
3911 }
3912
3913 // Limits on vector size (number of elements) loaded into vector.
3914 int Matcher::max_vector_size(const BasicType bt) {
3915 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3916 }
3917 int Matcher::min_vector_size(const BasicType bt) {
3918 int max_size = max_vector_size(bt);
3919 // Min size which can be loaded into vector is 4 bytes.
3920 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3921 // Support for calling svml double64 vectors
3922 if (bt == T_DOUBLE) {
3923 size = 1;
3924 }
3925 return MIN2(size,max_size);
3926 }
3927
3928 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3929 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3930 // by default on Cascade Lake
3931 if (VM_Version::is_default_intel_cascade_lake()) {
3932 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3933 }
3934 return Matcher::max_vector_size(bt);
3935 }
3936
3937 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3938 return -1;
3939 }
3940
3941 // Vector ideal reg corresponding to specified size in bytes
3942 uint Matcher::vector_ideal_reg(int size) {
3943 assert(MaxVectorSize >= size, "");
3944 switch(size) {
3945 case 4: return Op_VecS;
3946 case 8: return Op_VecD;
3947 case 16: return Op_VecX;
3948 case 32: return Op_VecY;
3949 case 64: return Op_VecZ;
3950 }
3951 ShouldNotReachHere();
3952 return 0;
3953 }
3954
3955 // Check for shift by small constant as well
3956 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3957 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3958 shift->in(2)->get_int() <= 3 &&
3959 // Are there other uses besides address expressions?
3960 !matcher->is_visited(shift)) {
3961 address_visited.set(shift->_idx); // Flag as address_visited
3962 mstack.push(shift->in(2), Matcher::Visit);
3963 Node *conv = shift->in(1);
3964 // Allow Matcher to match the rule which bypass
3965 // ConvI2L operation for an array index on LP64
3966 // if the index value is positive.
3967 if (conv->Opcode() == Op_ConvI2L &&
3968 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3969 // Are there other uses besides address expressions?
3970 !matcher->is_visited(conv)) {
3971 address_visited.set(conv->_idx); // Flag as address_visited
3972 mstack.push(conv->in(1), Matcher::Pre_Visit);
3973 } else {
3974 mstack.push(conv, Matcher::Pre_Visit);
3975 }
3976 return true;
3977 }
3978 return false;
3979 }
3980
3981 // This function identifies sub-graphs in which a 'load' node is
3982 // input to two different nodes, and such that it can be matched
3983 // with BMI instructions like blsi, blsr, etc.
3984 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3985 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3986 // refers to the same node.
3987 //
3988 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3989 // This is a temporary solution until we make DAGs expressible in ADL.
3990 template<typename ConType>
3991 class FusedPatternMatcher {
3992 Node* _op1_node;
3993 Node* _mop_node;
3994 int _con_op;
3995
3996 static int match_next(Node* n, int next_op, int next_op_idx) {
3997 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3998 return -1;
3999 }
4000
4001 if (next_op_idx == -1) { // n is commutative, try rotations
4002 if (n->in(1)->Opcode() == next_op) {
4003 return 1;
4004 } else if (n->in(2)->Opcode() == next_op) {
4005 return 2;
4006 }
4007 } else {
4008 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
4009 if (n->in(next_op_idx)->Opcode() == next_op) {
4010 return next_op_idx;
4011 }
4012 }
4013 return -1;
4014 }
4015
4016 public:
4017 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4018 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4019
4020 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4021 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4022 typename ConType::NativeType con_value) {
4023 if (_op1_node->Opcode() != op1) {
4024 return false;
4025 }
4026 if (_mop_node->outcnt() > 2) {
4027 return false;
4028 }
4029 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4030 if (op1_op2_idx == -1) {
4031 return false;
4032 }
4033 // Memory operation must be the other edge
4034 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4035
4036 // Check that the mop node is really what we want
4037 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4038 Node* op2_node = _op1_node->in(op1_op2_idx);
4039 if (op2_node->outcnt() > 1) {
4040 return false;
4041 }
4042 assert(op2_node->Opcode() == op2, "Should be");
4043 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4044 if (op2_con_idx == -1) {
4045 return false;
4046 }
4047 // Memory operation must be the other edge
4048 int op2_mop_idx = (op2_con_idx & 1) + 1;
4049 // Check that the memory operation is the same node
4050 if (op2_node->in(op2_mop_idx) == _mop_node) {
4051 // Now check the constant
4052 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4053 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4054 return true;
4055 }
4056 }
4057 }
4058 return false;
4059 }
4060 };
4061
4062 static bool is_bmi_pattern(Node* n, Node* m) {
4063 assert(UseBMI1Instructions, "sanity");
4064 if (n != nullptr && m != nullptr) {
4065 if (m->Opcode() == Op_LoadI) {
4066 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4067 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4068 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4069 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4070 } else if (m->Opcode() == Op_LoadL) {
4071 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4072 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4073 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4074 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4075 }
4076 }
4077 return false;
4078 }
4079
4080 // Should the matcher clone input 'm' of node 'n'?
4081 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4082 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4083 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4084 mstack.push(m, Visit);
4085 return true;
4086 }
4087 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4088 mstack.push(m, Visit); // m = ShiftCntV
4089 return true;
4090 }
4091 if (is_encode_and_store_pattern(n, m)) {
4092 mstack.push(m, Visit);
4093 return true;
4094 }
4095 return false;
4096 }
4097
4098 // Should the Matcher clone shifts on addressing modes, expecting them
4099 // to be subsumed into complex addressing expressions or compute them
4100 // into registers?
4101 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4102 Node *off = m->in(AddPNode::Offset);
4103 if (off->is_Con()) {
4104 address_visited.test_set(m->_idx); // Flag as address_visited
4105 Node *adr = m->in(AddPNode::Address);
4106
4107 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4108 // AtomicAdd is not an addressing expression.
4109 // Cheap to find it by looking for screwy base.
4110 if (adr->is_AddP() &&
4111 !adr->in(AddPNode::Base)->is_top() &&
4112 !adr->in(AddPNode::Offset)->is_Con() &&
4113 off->get_long() == (int) (off->get_long()) && // immL32
4114 // Are there other uses besides address expressions?
4115 !is_visited(adr)) {
4116 address_visited.set(adr->_idx); // Flag as address_visited
4117 Node *shift = adr->in(AddPNode::Offset);
4118 if (!clone_shift(shift, this, mstack, address_visited)) {
4119 mstack.push(shift, Pre_Visit);
4120 }
4121 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4122 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4123 } else {
4124 mstack.push(adr, Pre_Visit);
4125 }
4126
4127 // Clone X+offset as it also folds into most addressing expressions
4128 mstack.push(off, Visit);
4129 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4130 return true;
4131 } else if (clone_shift(off, this, mstack, address_visited)) {
4132 address_visited.test_set(m->_idx); // Flag as address_visited
4133 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4134 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4135 return true;
4136 }
4137 return false;
4138 }
4139
4140 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4141 switch (bt) {
4142 case BoolTest::eq:
4143 return Assembler::eq;
4144 case BoolTest::ne:
4145 return Assembler::neq;
4146 case BoolTest::le:
4147 case BoolTest::ule:
4148 return Assembler::le;
4149 case BoolTest::ge:
4150 case BoolTest::uge:
4151 return Assembler::nlt;
4152 case BoolTest::lt:
4153 case BoolTest::ult:
4154 return Assembler::lt;
4155 case BoolTest::gt:
4156 case BoolTest::ugt:
4157 return Assembler::nle;
4158 default : ShouldNotReachHere(); return Assembler::_false;
4159 }
4160 }
4161
4162 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4163 switch (bt) {
4164 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4165 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4166 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4167 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4168 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4169 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4170 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4171 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4172 }
4173 }
4174
4175 // Helper methods for MachSpillCopyNode::implementation().
4176 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4177 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4178 assert(ireg == Op_VecS || // 32bit vector
4179 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4180 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4181 "no non-adjacent vector moves" );
4182 if (masm) {
4183 switch (ireg) {
4184 case Op_VecS: // copy whole register
4185 case Op_VecD:
4186 case Op_VecX:
4187 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4188 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4189 } else {
4190 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4191 }
4192 break;
4193 case Op_VecY:
4194 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4195 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4196 } else {
4197 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4198 }
4199 break;
4200 case Op_VecZ:
4201 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4202 break;
4203 default:
4204 ShouldNotReachHere();
4205 }
4206 #ifndef PRODUCT
4207 } else {
4208 switch (ireg) {
4209 case Op_VecS:
4210 case Op_VecD:
4211 case Op_VecX:
4212 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4213 break;
4214 case Op_VecY:
4215 case Op_VecZ:
4216 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4217 break;
4218 default:
4219 ShouldNotReachHere();
4220 }
4221 #endif
4222 }
4223 }
4224
4225 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4226 int stack_offset, int reg, uint ireg, outputStream* st) {
4227 if (masm) {
4228 if (is_load) {
4229 switch (ireg) {
4230 case Op_VecS:
4231 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4232 break;
4233 case Op_VecD:
4234 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4235 break;
4236 case Op_VecX:
4237 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4238 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4239 } else {
4240 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4241 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4242 }
4243 break;
4244 case Op_VecY:
4245 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4246 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4247 } else {
4248 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4249 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4250 }
4251 break;
4252 case Op_VecZ:
4253 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4254 break;
4255 default:
4256 ShouldNotReachHere();
4257 }
4258 } else { // store
4259 switch (ireg) {
4260 case Op_VecS:
4261 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4262 break;
4263 case Op_VecD:
4264 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4265 break;
4266 case Op_VecX:
4267 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4268 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4269 }
4270 else {
4271 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4272 }
4273 break;
4274 case Op_VecY:
4275 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4276 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4277 }
4278 else {
4279 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4280 }
4281 break;
4282 case Op_VecZ:
4283 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4284 break;
4285 default:
4286 ShouldNotReachHere();
4287 }
4288 }
4289 #ifndef PRODUCT
4290 } else {
4291 if (is_load) {
4292 switch (ireg) {
4293 case Op_VecS:
4294 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4295 break;
4296 case Op_VecD:
4297 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4298 break;
4299 case Op_VecX:
4300 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4301 break;
4302 case Op_VecY:
4303 case Op_VecZ:
4304 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4305 break;
4306 default:
4307 ShouldNotReachHere();
4308 }
4309 } else { // store
4310 switch (ireg) {
4311 case Op_VecS:
4312 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4313 break;
4314 case Op_VecD:
4315 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4316 break;
4317 case Op_VecX:
4318 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4319 break;
4320 case Op_VecY:
4321 case Op_VecZ:
4322 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4323 break;
4324 default:
4325 ShouldNotReachHere();
4326 }
4327 }
4328 #endif
4329 }
4330 }
4331
4332 template <class T>
4333 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4334 int size = type2aelembytes(bt) * len;
4335 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4336 for (int i = 0; i < len; i++) {
4337 int offset = i * type2aelembytes(bt);
4338 switch (bt) {
4339 case T_BYTE: val->at(i) = con; break;
4340 case T_SHORT: {
4341 jshort c = con;
4342 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4343 break;
4344 }
4345 case T_INT: {
4346 jint c = con;
4347 memcpy(val->adr_at(offset), &c, sizeof(jint));
4348 break;
4349 }
4350 case T_LONG: {
4351 jlong c = con;
4352 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4353 break;
4354 }
4355 case T_FLOAT: {
4356 jfloat c = con;
4357 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4358 break;
4359 }
4360 case T_DOUBLE: {
4361 jdouble c = con;
4362 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4363 break;
4364 }
4365 default: assert(false, "%s", type2name(bt));
4366 }
4367 }
4368 return val;
4369 }
4370
4371 static inline jlong high_bit_set(BasicType bt) {
4372 switch (bt) {
4373 case T_BYTE: return 0x8080808080808080;
4374 case T_SHORT: return 0x8000800080008000;
4375 case T_INT: return 0x8000000080000000;
4376 case T_LONG: return 0x8000000000000000;
4377 default:
4378 ShouldNotReachHere();
4379 return 0;
4380 }
4381 }
4382
4383 #ifndef PRODUCT
4384 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4385 st->print("nop \t# %d bytes pad for loops and calls", _count);
4386 }
4387 #endif
4388
4389 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4390 __ nop(_count);
4391 }
4392
4393 uint MachNopNode::size(PhaseRegAlloc*) const {
4394 return _count;
4395 }
4396
4397 #ifndef PRODUCT
4398 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4399 st->print("# breakpoint");
4400 }
4401 #endif
4402
4403 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4404 __ int3();
4405 }
4406
4407 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4408 return MachNode::size(ra_);
4409 }
4410
4411 %}
4412
4413 //----------ENCODING BLOCK-----------------------------------------------------
4414 // This block specifies the encoding classes used by the compiler to
4415 // output byte streams. Encoding classes are parameterized macros
4416 // used by Machine Instruction Nodes in order to generate the bit
4417 // encoding of the instruction. Operands specify their base encoding
4418 // interface with the interface keyword. There are currently
4419 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4420 // COND_INTER. REG_INTER causes an operand to generate a function
4421 // which returns its register number when queried. CONST_INTER causes
4422 // an operand to generate a function which returns the value of the
4423 // constant when queried. MEMORY_INTER causes an operand to generate
4424 // four functions which return the Base Register, the Index Register,
4425 // the Scale Value, and the Offset Value of the operand when queried.
4426 // COND_INTER causes an operand to generate six functions which return
4427 // the encoding code (ie - encoding bits for the instruction)
4428 // associated with each basic boolean condition for a conditional
4429 // instruction.
4430 //
4431 // Instructions specify two basic values for encoding. Again, a
4432 // function is available to check if the constant displacement is an
4433 // oop. They use the ins_encode keyword to specify their encoding
4434 // classes (which must be a sequence of enc_class names, and their
4435 // parameters, specified in the encoding block), and they use the
4436 // opcode keyword to specify, in order, their primary, secondary, and
4437 // tertiary opcode. Only the opcode sections which a particular
4438 // instruction needs for encoding need to be specified.
4439 encode %{
4440 enc_class cdql_enc(no_rax_rdx_RegI div)
4441 %{
4442 // Full implementation of Java idiv and irem; checks for
4443 // special case as described in JVM spec., p.243 & p.271.
4444 //
4445 // normal case special case
4446 //
4447 // input : rax: dividend min_int
4448 // reg: divisor -1
4449 //
4450 // output: rax: quotient (= rax idiv reg) min_int
4451 // rdx: remainder (= rax irem reg) 0
4452 //
4453 // Code sequnce:
4454 //
4455 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4456 // 5: 75 07/08 jne e <normal>
4457 // 7: 33 d2 xor %edx,%edx
4458 // [div >= 8 -> offset + 1]
4459 // [REX_B]
4460 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4461 // c: 74 03/04 je 11 <done>
4462 // 000000000000000e <normal>:
4463 // e: 99 cltd
4464 // [div >= 8 -> offset + 1]
4465 // [REX_B]
4466 // f: f7 f9 idiv $div
4467 // 0000000000000011 <done>:
4468 Label normal;
4469 Label done;
4470
4471 // cmp $0x80000000,%eax
4472 __ cmpl(as_Register(RAX_enc), 0x80000000);
4473
4474 // jne e <normal>
4475 __ jccb(Assembler::notEqual, normal);
4476
4477 // xor %edx,%edx
4478 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4479
4480 // cmp $0xffffffffffffffff,%ecx
4481 __ cmpl($div$$Register, -1);
4482
4483 // je 11 <done>
4484 __ jccb(Assembler::equal, done);
4485
4486 // <normal>
4487 // cltd
4488 __ bind(normal);
4489 __ cdql();
4490
4491 // idivl
4492 // <done>
4493 __ idivl($div$$Register);
4494 __ bind(done);
4495 %}
4496
4497 enc_class cdqq_enc(no_rax_rdx_RegL div)
4498 %{
4499 // Full implementation of Java ldiv and lrem; checks for
4500 // special case as described in JVM spec., p.243 & p.271.
4501 //
4502 // normal case special case
4503 //
4504 // input : rax: dividend min_long
4505 // reg: divisor -1
4506 //
4507 // output: rax: quotient (= rax idiv reg) min_long
4508 // rdx: remainder (= rax irem reg) 0
4509 //
4510 // Code sequnce:
4511 //
4512 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4513 // 7: 00 00 80
4514 // a: 48 39 d0 cmp %rdx,%rax
4515 // d: 75 08 jne 17 <normal>
4516 // f: 33 d2 xor %edx,%edx
4517 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4518 // 15: 74 05 je 1c <done>
4519 // 0000000000000017 <normal>:
4520 // 17: 48 99 cqto
4521 // 19: 48 f7 f9 idiv $div
4522 // 000000000000001c <done>:
4523 Label normal;
4524 Label done;
4525
4526 // mov $0x8000000000000000,%rdx
4527 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4528
4529 // cmp %rdx,%rax
4530 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4531
4532 // jne 17 <normal>
4533 __ jccb(Assembler::notEqual, normal);
4534
4535 // xor %edx,%edx
4536 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4537
4538 // cmp $0xffffffffffffffff,$div
4539 __ cmpq($div$$Register, -1);
4540
4541 // je 1e <done>
4542 __ jccb(Assembler::equal, done);
4543
4544 // <normal>
4545 // cqto
4546 __ bind(normal);
4547 __ cdqq();
4548
4549 // idivq (note: must be emitted by the user of this rule)
4550 // <done>
4551 __ idivq($div$$Register);
4552 __ bind(done);
4553 %}
4554
4555 enc_class clear_avx %{
4556 DEBUG_ONLY(int off0 = __ offset());
4557 if (generate_vzeroupper(Compile::current())) {
4558 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4559 // Clear upper bits of YMM registers when current compiled code uses
4560 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4561 __ vzeroupper();
4562 }
4563 DEBUG_ONLY(int off1 = __ offset());
4564 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4565 %}
4566
4567 enc_class Java_To_Runtime(method meth) %{
4568 __ lea(r10, RuntimeAddress((address)$meth$$method));
4569 __ call(r10);
4570 __ post_call_nop();
4571 %}
4572
4573 enc_class Java_Static_Call(method meth)
4574 %{
4575 // JAVA STATIC CALL
4576 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4577 // determine who we intended to call.
4578 if (!_method) {
4579 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4580 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4581 // The NOP here is purely to ensure that eliding a call to
4582 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4583 __ nop(5);
4584 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4585 } else {
4586 int method_index = resolved_method_index(masm);
4587 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4588 : static_call_Relocation::spec(method_index);
4589 address mark = __ pc();
4590 int call_offset = __ offset();
4591 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4592 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4593 // Calls of the same statically bound method can share
4594 // a stub to the interpreter.
4595 __ code()->shared_stub_to_interp_for(_method, call_offset);
4596 } else {
4597 // Emit stubs for static call.
4598 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4599 __ clear_inst_mark();
4600 if (stub == nullptr) {
4601 ciEnv::current()->record_failure("CodeCache is full");
4602 return;
4603 }
4604 }
4605 }
4606 __ post_call_nop();
4607 %}
4608
4609 enc_class Java_Dynamic_Call(method meth) %{
4610 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4611 __ post_call_nop();
4612 %}
4613
4614 enc_class call_epilog %{
4615 if (VerifyStackAtCalls) {
4616 // Check that stack depth is unchanged: find majik cookie on stack
4617 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4618 Label L;
4619 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4620 __ jccb(Assembler::equal, L);
4621 // Die if stack mismatch
4622 __ int3();
4623 __ bind(L);
4624 }
4625 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4626 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4627 // Search for the corresponding projection, get the register and emit code that initialized it.
4628 uint con = (tf()->range_cc()->cnt() - 1);
4629 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4630 ProjNode* proj = fast_out(i)->as_Proj();
4631 if (proj->_con == con) {
4632 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4633 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4634 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4635 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4636 __ testq(rax, rax);
4637 __ setb(Assembler::notZero, toReg);
4638 __ movzbl(toReg, toReg);
4639 if (reg->is_stack()) {
4640 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4641 __ movq(Address(rsp, st_off), toReg);
4642 }
4643 break;
4644 }
4645 }
4646 if (return_value_is_used()) {
4647 // An inline type is returned as fields in multiple registers.
4648 // Rax either contains an oop if the inline type is buffered or a pointer
4649 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4650 // if the lowest bit is set to allow C2 to use the oop after null checking.
4651 // rax &= (rax & 1) - 1
4652 __ movptr(rscratch1, rax);
4653 __ andptr(rscratch1, 0x1);
4654 __ subptr(rscratch1, 0x1);
4655 __ andptr(rax, rscratch1);
4656 }
4657 }
4658 %}
4659
4660 %}
4661
4662 //----------FRAME--------------------------------------------------------------
4663 // Definition of frame structure and management information.
4664 //
4665 // S T A C K L A Y O U T Allocators stack-slot number
4666 // | (to get allocators register number
4667 // G Owned by | | v add OptoReg::stack0())
4668 // r CALLER | |
4669 // o | +--------+ pad to even-align allocators stack-slot
4670 // w V | pad0 | numbers; owned by CALLER
4671 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4672 // h ^ | in | 5
4673 // | | args | 4 Holes in incoming args owned by SELF
4674 // | | | | 3
4675 // | | +--------+
4676 // V | | old out| Empty on Intel, window on Sparc
4677 // | old |preserve| Must be even aligned.
4678 // | SP-+--------+----> Matcher::_old_SP, even aligned
4679 // | | in | 3 area for Intel ret address
4680 // Owned by |preserve| Empty on Sparc.
4681 // SELF +--------+
4682 // | | pad2 | 2 pad to align old SP
4683 // | +--------+ 1
4684 // | | locks | 0
4685 // | +--------+----> OptoReg::stack0(), even aligned
4686 // | | pad1 | 11 pad to align new SP
4687 // | +--------+
4688 // | | | 10
4689 // | | spills | 9 spills
4690 // V | | 8 (pad0 slot for callee)
4691 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4692 // ^ | out | 7
4693 // | | args | 6 Holes in outgoing args owned by CALLEE
4694 // Owned by +--------+
4695 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4696 // | new |preserve| Must be even-aligned.
4697 // | SP-+--------+----> Matcher::_new_SP, even aligned
4698 // | | |
4699 //
4700 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4701 // known from SELF's arguments and the Java calling convention.
4702 // Region 6-7 is determined per call site.
4703 // Note 2: If the calling convention leaves holes in the incoming argument
4704 // area, those holes are owned by SELF. Holes in the outgoing area
4705 // are owned by the CALLEE. Holes should not be necessary in the
4706 // incoming area, as the Java calling convention is completely under
4707 // the control of the AD file. Doubles can be sorted and packed to
4708 // avoid holes. Holes in the outgoing arguments may be necessary for
4709 // varargs C calling conventions.
4710 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4711 // even aligned with pad0 as needed.
4712 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4713 // region 6-11 is even aligned; it may be padded out more so that
4714 // the region from SP to FP meets the minimum stack alignment.
4715 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4716 // alignment. Region 11, pad1, may be dynamically extended so that
4717 // SP meets the minimum alignment.
4718
4719 frame
4720 %{
4721 // These three registers define part of the calling convention
4722 // between compiled code and the interpreter.
4723 inline_cache_reg(RAX); // Inline Cache Register
4724
4725 // Optional: name the operand used by cisc-spilling to access
4726 // [stack_pointer + offset]
4727 cisc_spilling_operand_name(indOffset32);
4728
4729 // Number of stack slots consumed by locking an object
4730 sync_stack_slots(2);
4731
4732 // Compiled code's Frame Pointer
4733 frame_pointer(RSP);
4734
4735 // Stack alignment requirement
4736 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4737
4738 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4739 // for calls to C. Supports the var-args backing area for register parms.
4740 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4741
4742 // The after-PROLOG location of the return address. Location of
4743 // return address specifies a type (REG or STACK) and a number
4744 // representing the register number (i.e. - use a register name) or
4745 // stack slot.
4746 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4747 // Otherwise, it is above the locks and verification slot and alignment word
4748 return_addr(STACK - 2 +
4749 align_up((Compile::current()->in_preserve_stack_slots() +
4750 Compile::current()->fixed_slots()),
4751 stack_alignment_in_slots()));
4752
4753 // Location of compiled Java return values. Same as C for now.
4754 return_value
4755 %{
4756 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4757 "only return normal values");
4758
4759 static const int lo[Op_RegL + 1] = {
4760 0,
4761 0,
4762 RAX_num, // Op_RegN
4763 RAX_num, // Op_RegI
4764 RAX_num, // Op_RegP
4765 XMM0_num, // Op_RegF
4766 XMM0_num, // Op_RegD
4767 RAX_num // Op_RegL
4768 };
4769 static const int hi[Op_RegL + 1] = {
4770 0,
4771 0,
4772 OptoReg::Bad, // Op_RegN
4773 OptoReg::Bad, // Op_RegI
4774 RAX_H_num, // Op_RegP
4775 OptoReg::Bad, // Op_RegF
4776 XMM0b_num, // Op_RegD
4777 RAX_H_num // Op_RegL
4778 };
4779 // Excluded flags and vector registers.
4780 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4781 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4782 %}
4783 %}
4784
4785 //----------ATTRIBUTES---------------------------------------------------------
4786 //----------Operand Attributes-------------------------------------------------
4787 op_attrib op_cost(0); // Required cost attribute
4788
4789 //----------Instruction Attributes---------------------------------------------
4790 ins_attrib ins_cost(100); // Required cost attribute
4791 ins_attrib ins_size(8); // Required size attribute (in bits)
4792 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4793 // a non-matching short branch variant
4794 // of some long branch?
4795 ins_attrib ins_alignment(1); // Required alignment attribute (must
4796 // be a power of 2) specifies the
4797 // alignment that some part of the
4798 // instruction (not necessarily the
4799 // start) requires. If > 1, a
4800 // compute_padding() function must be
4801 // provided for the instruction
4802
4803 // Whether this node is expanded during code emission into a sequence of
4804 // instructions and the first instruction can perform an implicit null check.
4805 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4806
4807 //----------OPERANDS-----------------------------------------------------------
4808 // Operand definitions must precede instruction definitions for correct parsing
4809 // in the ADLC because operands constitute user defined types which are used in
4810 // instruction definitions.
4811
4812 //----------Simple Operands----------------------------------------------------
4813 // Immediate Operands
4814 // Integer Immediate
4815 operand immI()
4816 %{
4817 match(ConI);
4818
4819 op_cost(10);
4820 format %{ %}
4821 interface(CONST_INTER);
4822 %}
4823
4824 // Constant for test vs zero
4825 operand immI_0()
4826 %{
4827 predicate(n->get_int() == 0);
4828 match(ConI);
4829
4830 op_cost(0);
4831 format %{ %}
4832 interface(CONST_INTER);
4833 %}
4834
4835 // Constant for increment
4836 operand immI_1()
4837 %{
4838 predicate(n->get_int() == 1);
4839 match(ConI);
4840
4841 op_cost(0);
4842 format %{ %}
4843 interface(CONST_INTER);
4844 %}
4845
4846 // Constant for decrement
4847 operand immI_M1()
4848 %{
4849 predicate(n->get_int() == -1);
4850 match(ConI);
4851
4852 op_cost(0);
4853 format %{ %}
4854 interface(CONST_INTER);
4855 %}
4856
4857 operand immI_2()
4858 %{
4859 predicate(n->get_int() == 2);
4860 match(ConI);
4861
4862 op_cost(0);
4863 format %{ %}
4864 interface(CONST_INTER);
4865 %}
4866
4867 operand immI_4()
4868 %{
4869 predicate(n->get_int() == 4);
4870 match(ConI);
4871
4872 op_cost(0);
4873 format %{ %}
4874 interface(CONST_INTER);
4875 %}
4876
4877 operand immI_8()
4878 %{
4879 predicate(n->get_int() == 8);
4880 match(ConI);
4881
4882 op_cost(0);
4883 format %{ %}
4884 interface(CONST_INTER);
4885 %}
4886
4887 // Valid scale values for addressing modes
4888 operand immI2()
4889 %{
4890 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4891 match(ConI);
4892
4893 format %{ %}
4894 interface(CONST_INTER);
4895 %}
4896
4897 operand immU7()
4898 %{
4899 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4900 match(ConI);
4901
4902 op_cost(5);
4903 format %{ %}
4904 interface(CONST_INTER);
4905 %}
4906
4907 operand immI8()
4908 %{
4909 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4910 match(ConI);
4911
4912 op_cost(5);
4913 format %{ %}
4914 interface(CONST_INTER);
4915 %}
4916
4917 operand immU8()
4918 %{
4919 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4920 match(ConI);
4921
4922 op_cost(5);
4923 format %{ %}
4924 interface(CONST_INTER);
4925 %}
4926
4927 operand immI16()
4928 %{
4929 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4930 match(ConI);
4931
4932 op_cost(10);
4933 format %{ %}
4934 interface(CONST_INTER);
4935 %}
4936
4937 // Int Immediate non-negative
4938 operand immU31()
4939 %{
4940 predicate(n->get_int() >= 0);
4941 match(ConI);
4942
4943 op_cost(0);
4944 format %{ %}
4945 interface(CONST_INTER);
4946 %}
4947
4948 // Pointer Immediate
4949 operand immP()
4950 %{
4951 match(ConP);
4952
4953 op_cost(10);
4954 format %{ %}
4955 interface(CONST_INTER);
4956 %}
4957
4958 // Null Pointer Immediate
4959 operand immP0()
4960 %{
4961 predicate(n->get_ptr() == 0);
4962 match(ConP);
4963
4964 op_cost(5);
4965 format %{ %}
4966 interface(CONST_INTER);
4967 %}
4968
4969 // Pointer Immediate
4970 operand immN() %{
4971 match(ConN);
4972
4973 op_cost(10);
4974 format %{ %}
4975 interface(CONST_INTER);
4976 %}
4977
4978 operand immNKlass() %{
4979 match(ConNKlass);
4980
4981 op_cost(10);
4982 format %{ %}
4983 interface(CONST_INTER);
4984 %}
4985
4986 // Null Pointer Immediate
4987 operand immN0() %{
4988 predicate(n->get_narrowcon() == 0);
4989 match(ConN);
4990
4991 op_cost(5);
4992 format %{ %}
4993 interface(CONST_INTER);
4994 %}
4995
4996 operand immP31()
4997 %{
4998 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4999 && (n->get_ptr() >> 31) == 0);
5000 match(ConP);
5001
5002 op_cost(5);
5003 format %{ %}
5004 interface(CONST_INTER);
5005 %}
5006
5007
5008 // Long Immediate
5009 operand immL()
5010 %{
5011 match(ConL);
5012
5013 op_cost(20);
5014 format %{ %}
5015 interface(CONST_INTER);
5016 %}
5017
5018 // Long Immediate 8-bit
5019 operand immL8()
5020 %{
5021 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5022 match(ConL);
5023
5024 op_cost(5);
5025 format %{ %}
5026 interface(CONST_INTER);
5027 %}
5028
5029 // Long Immediate 32-bit unsigned
5030 operand immUL32()
5031 %{
5032 predicate(n->get_long() == (unsigned int) (n->get_long()));
5033 match(ConL);
5034
5035 op_cost(10);
5036 format %{ %}
5037 interface(CONST_INTER);
5038 %}
5039
5040 // Long Immediate 32-bit signed
5041 operand immL32()
5042 %{
5043 predicate(n->get_long() == (int) (n->get_long()));
5044 match(ConL);
5045
5046 op_cost(15);
5047 format %{ %}
5048 interface(CONST_INTER);
5049 %}
5050
5051 operand immL_Pow2()
5052 %{
5053 predicate(is_power_of_2((julong)n->get_long()));
5054 match(ConL);
5055
5056 op_cost(15);
5057 format %{ %}
5058 interface(CONST_INTER);
5059 %}
5060
5061 operand immL_NotPow2()
5062 %{
5063 predicate(is_power_of_2((julong)~n->get_long()));
5064 match(ConL);
5065
5066 op_cost(15);
5067 format %{ %}
5068 interface(CONST_INTER);
5069 %}
5070
5071 // Long Immediate zero
5072 operand immL0()
5073 %{
5074 predicate(n->get_long() == 0L);
5075 match(ConL);
5076
5077 op_cost(10);
5078 format %{ %}
5079 interface(CONST_INTER);
5080 %}
5081
5082 // Constant for increment
5083 operand immL1()
5084 %{
5085 predicate(n->get_long() == 1);
5086 match(ConL);
5087
5088 format %{ %}
5089 interface(CONST_INTER);
5090 %}
5091
5092 // Constant for decrement
5093 operand immL_M1()
5094 %{
5095 predicate(n->get_long() == -1);
5096 match(ConL);
5097
5098 format %{ %}
5099 interface(CONST_INTER);
5100 %}
5101
5102 // Long Immediate: low 32-bit mask
5103 operand immL_32bits()
5104 %{
5105 predicate(n->get_long() == 0xFFFFFFFFL);
5106 match(ConL);
5107 op_cost(20);
5108
5109 format %{ %}
5110 interface(CONST_INTER);
5111 %}
5112
5113 // Int Immediate: 2^n-1, positive
5114 operand immI_Pow2M1()
5115 %{
5116 predicate((n->get_int() > 0)
5117 && is_power_of_2((juint)n->get_int() + 1));
5118 match(ConI);
5119
5120 op_cost(20);
5121 format %{ %}
5122 interface(CONST_INTER);
5123 %}
5124
5125 // Float Immediate zero
5126 operand immF0()
5127 %{
5128 predicate(jint_cast(n->getf()) == 0);
5129 match(ConF);
5130
5131 op_cost(5);
5132 format %{ %}
5133 interface(CONST_INTER);
5134 %}
5135
5136 // Float Immediate
5137 operand immF()
5138 %{
5139 match(ConF);
5140
5141 op_cost(15);
5142 format %{ %}
5143 interface(CONST_INTER);
5144 %}
5145
5146 // Half Float Immediate
5147 operand immH()
5148 %{
5149 match(ConH);
5150
5151 op_cost(15);
5152 format %{ %}
5153 interface(CONST_INTER);
5154 %}
5155
5156 // Double Immediate zero
5157 operand immD0()
5158 %{
5159 predicate(jlong_cast(n->getd()) == 0);
5160 match(ConD);
5161
5162 op_cost(5);
5163 format %{ %}
5164 interface(CONST_INTER);
5165 %}
5166
5167 // Double Immediate
5168 operand immD()
5169 %{
5170 match(ConD);
5171
5172 op_cost(15);
5173 format %{ %}
5174 interface(CONST_INTER);
5175 %}
5176
5177 // Immediates for special shifts (sign extend)
5178
5179 // Constants for increment
5180 operand immI_16()
5181 %{
5182 predicate(n->get_int() == 16);
5183 match(ConI);
5184
5185 format %{ %}
5186 interface(CONST_INTER);
5187 %}
5188
5189 operand immI_24()
5190 %{
5191 predicate(n->get_int() == 24);
5192 match(ConI);
5193
5194 format %{ %}
5195 interface(CONST_INTER);
5196 %}
5197
5198 // Constant for byte-wide masking
5199 operand immI_255()
5200 %{
5201 predicate(n->get_int() == 255);
5202 match(ConI);
5203
5204 format %{ %}
5205 interface(CONST_INTER);
5206 %}
5207
5208 // Constant for short-wide masking
5209 operand immI_65535()
5210 %{
5211 predicate(n->get_int() == 65535);
5212 match(ConI);
5213
5214 format %{ %}
5215 interface(CONST_INTER);
5216 %}
5217
5218 // Constant for byte-wide masking
5219 operand immL_255()
5220 %{
5221 predicate(n->get_long() == 255);
5222 match(ConL);
5223
5224 format %{ %}
5225 interface(CONST_INTER);
5226 %}
5227
5228 // Constant for short-wide masking
5229 operand immL_65535()
5230 %{
5231 predicate(n->get_long() == 65535);
5232 match(ConL);
5233
5234 format %{ %}
5235 interface(CONST_INTER);
5236 %}
5237
5238 // AOT Runtime Constants Address
5239 operand immAOTRuntimeConstantsAddress()
5240 %{
5241 // Check if the address is in the range of AOT Runtime Constants
5242 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5243 match(ConP);
5244
5245 op_cost(0);
5246 format %{ %}
5247 interface(CONST_INTER);
5248 %}
5249
5250 operand kReg()
5251 %{
5252 constraint(ALLOC_IN_RC(vectmask_reg));
5253 match(RegVectMask);
5254 format %{%}
5255 interface(REG_INTER);
5256 %}
5257
5258 // Register Operands
5259 // Integer Register
5260 operand rRegI()
5261 %{
5262 constraint(ALLOC_IN_RC(int_reg));
5263 match(RegI);
5264
5265 match(rax_RegI);
5266 match(rbx_RegI);
5267 match(rcx_RegI);
5268 match(rdx_RegI);
5269 match(rdi_RegI);
5270
5271 format %{ %}
5272 interface(REG_INTER);
5273 %}
5274
5275 // Special Registers
5276 operand rax_RegI()
5277 %{
5278 constraint(ALLOC_IN_RC(int_rax_reg));
5279 match(RegI);
5280 match(rRegI);
5281
5282 format %{ "RAX" %}
5283 interface(REG_INTER);
5284 %}
5285
5286 // Special Registers
5287 operand rbx_RegI()
5288 %{
5289 constraint(ALLOC_IN_RC(int_rbx_reg));
5290 match(RegI);
5291 match(rRegI);
5292
5293 format %{ "RBX" %}
5294 interface(REG_INTER);
5295 %}
5296
5297 operand rcx_RegI()
5298 %{
5299 constraint(ALLOC_IN_RC(int_rcx_reg));
5300 match(RegI);
5301 match(rRegI);
5302
5303 format %{ "RCX" %}
5304 interface(REG_INTER);
5305 %}
5306
5307 operand rdx_RegI()
5308 %{
5309 constraint(ALLOC_IN_RC(int_rdx_reg));
5310 match(RegI);
5311 match(rRegI);
5312
5313 format %{ "RDX" %}
5314 interface(REG_INTER);
5315 %}
5316
5317 operand rdi_RegI()
5318 %{
5319 constraint(ALLOC_IN_RC(int_rdi_reg));
5320 match(RegI);
5321 match(rRegI);
5322
5323 format %{ "RDI" %}
5324 interface(REG_INTER);
5325 %}
5326
5327 operand no_rax_rdx_RegI()
5328 %{
5329 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5330 match(RegI);
5331 match(rbx_RegI);
5332 match(rcx_RegI);
5333 match(rdi_RegI);
5334
5335 format %{ %}
5336 interface(REG_INTER);
5337 %}
5338
5339 operand no_rbp_r13_RegI()
5340 %{
5341 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5342 match(RegI);
5343 match(rRegI);
5344 match(rax_RegI);
5345 match(rbx_RegI);
5346 match(rcx_RegI);
5347 match(rdx_RegI);
5348 match(rdi_RegI);
5349
5350 format %{ %}
5351 interface(REG_INTER);
5352 %}
5353
5354 // Pointer Register
5355 operand any_RegP()
5356 %{
5357 constraint(ALLOC_IN_RC(any_reg));
5358 match(RegP);
5359 match(rax_RegP);
5360 match(rbx_RegP);
5361 match(rdi_RegP);
5362 match(rsi_RegP);
5363 match(rbp_RegP);
5364 match(r15_RegP);
5365 match(rRegP);
5366
5367 format %{ %}
5368 interface(REG_INTER);
5369 %}
5370
5371 operand rRegP()
5372 %{
5373 constraint(ALLOC_IN_RC(ptr_reg));
5374 match(RegP);
5375 match(rax_RegP);
5376 match(rbx_RegP);
5377 match(rdi_RegP);
5378 match(rsi_RegP);
5379 match(rbp_RegP); // See Q&A below about
5380 match(r15_RegP); // r15_RegP and rbp_RegP.
5381
5382 format %{ %}
5383 interface(REG_INTER);
5384 %}
5385
5386 operand rRegN() %{
5387 constraint(ALLOC_IN_RC(int_reg));
5388 match(RegN);
5389
5390 format %{ %}
5391 interface(REG_INTER);
5392 %}
5393
5394 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5395 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5396 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5397 // The output of an instruction is controlled by the allocator, which respects
5398 // register class masks, not match rules. Unless an instruction mentions
5399 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5400 // by the allocator as an input.
5401 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5402 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5403 // result, RBP is not included in the output of the instruction either.
5404
5405 // This operand is not allowed to use RBP even if
5406 // RBP is not used to hold the frame pointer.
5407 operand no_rbp_RegP()
5408 %{
5409 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5410 match(RegP);
5411 match(rbx_RegP);
5412 match(rsi_RegP);
5413 match(rdi_RegP);
5414
5415 format %{ %}
5416 interface(REG_INTER);
5417 %}
5418
5419 // Special Registers
5420 // Return a pointer value
5421 operand rax_RegP()
5422 %{
5423 constraint(ALLOC_IN_RC(ptr_rax_reg));
5424 match(RegP);
5425 match(rRegP);
5426
5427 format %{ %}
5428 interface(REG_INTER);
5429 %}
5430
5431 // Special Registers
5432 // Return a compressed pointer value
5433 operand rax_RegN()
5434 %{
5435 constraint(ALLOC_IN_RC(int_rax_reg));
5436 match(RegN);
5437 match(rRegN);
5438
5439 format %{ %}
5440 interface(REG_INTER);
5441 %}
5442
5443 // Used in AtomicAdd
5444 operand rbx_RegP()
5445 %{
5446 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5447 match(RegP);
5448 match(rRegP);
5449
5450 format %{ %}
5451 interface(REG_INTER);
5452 %}
5453
5454 operand rsi_RegP()
5455 %{
5456 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5457 match(RegP);
5458 match(rRegP);
5459
5460 format %{ %}
5461 interface(REG_INTER);
5462 %}
5463
5464 operand rbp_RegP()
5465 %{
5466 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5467 match(RegP);
5468 match(rRegP);
5469
5470 format %{ %}
5471 interface(REG_INTER);
5472 %}
5473
5474 // Used in rep stosq
5475 operand rdi_RegP()
5476 %{
5477 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5478 match(RegP);
5479 match(rRegP);
5480
5481 format %{ %}
5482 interface(REG_INTER);
5483 %}
5484
5485 operand r15_RegP()
5486 %{
5487 constraint(ALLOC_IN_RC(ptr_r15_reg));
5488 match(RegP);
5489 match(rRegP);
5490
5491 format %{ %}
5492 interface(REG_INTER);
5493 %}
5494
5495 operand rRegL()
5496 %{
5497 constraint(ALLOC_IN_RC(long_reg));
5498 match(RegL);
5499 match(rax_RegL);
5500 match(rdx_RegL);
5501
5502 format %{ %}
5503 interface(REG_INTER);
5504 %}
5505
5506 // Special Registers
5507 operand no_rax_rdx_RegL()
5508 %{
5509 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5510 match(RegL);
5511 match(rRegL);
5512
5513 format %{ %}
5514 interface(REG_INTER);
5515 %}
5516
5517 operand rax_RegL()
5518 %{
5519 constraint(ALLOC_IN_RC(long_rax_reg));
5520 match(RegL);
5521 match(rRegL);
5522
5523 format %{ "RAX" %}
5524 interface(REG_INTER);
5525 %}
5526
5527 operand rcx_RegL()
5528 %{
5529 constraint(ALLOC_IN_RC(long_rcx_reg));
5530 match(RegL);
5531 match(rRegL);
5532
5533 format %{ %}
5534 interface(REG_INTER);
5535 %}
5536
5537 operand rdx_RegL()
5538 %{
5539 constraint(ALLOC_IN_RC(long_rdx_reg));
5540 match(RegL);
5541 match(rRegL);
5542
5543 format %{ %}
5544 interface(REG_INTER);
5545 %}
5546
5547 operand r11_RegL()
5548 %{
5549 constraint(ALLOC_IN_RC(long_r11_reg));
5550 match(RegL);
5551 match(rRegL);
5552
5553 format %{ %}
5554 interface(REG_INTER);
5555 %}
5556
5557 operand no_rbp_r13_RegL()
5558 %{
5559 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5560 match(RegL);
5561 match(rRegL);
5562 match(rax_RegL);
5563 match(rcx_RegL);
5564 match(rdx_RegL);
5565
5566 format %{ %}
5567 interface(REG_INTER);
5568 %}
5569
5570 // Flags register, used as output of compare instructions
5571 operand rFlagsReg()
5572 %{
5573 constraint(ALLOC_IN_RC(int_flags));
5574 match(RegFlags);
5575
5576 format %{ "RFLAGS" %}
5577 interface(REG_INTER);
5578 %}
5579
5580 // Flags register, used as output of FLOATING POINT compare instructions
5581 operand rFlagsRegU()
5582 %{
5583 constraint(ALLOC_IN_RC(int_flags));
5584 match(RegFlags);
5585
5586 format %{ "RFLAGS_U" %}
5587 interface(REG_INTER);
5588 %}
5589
5590 operand rFlagsRegUCF() %{
5591 constraint(ALLOC_IN_RC(int_flags));
5592 match(RegFlags);
5593 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5594
5595 format %{ "RFLAGS_U_CF" %}
5596 interface(REG_INTER);
5597 %}
5598
5599 operand rFlagsRegUCFE() %{
5600 constraint(ALLOC_IN_RC(int_flags));
5601 match(RegFlags);
5602 predicate(UseAPX && VM_Version::supports_avx10_2());
5603
5604 format %{ "RFLAGS_U_CFE" %}
5605 interface(REG_INTER);
5606 %}
5607
5608 // Float register operands
5609 operand regF() %{
5610 constraint(ALLOC_IN_RC(float_reg));
5611 match(RegF);
5612
5613 format %{ %}
5614 interface(REG_INTER);
5615 %}
5616
5617 // Float register operands
5618 operand legRegF() %{
5619 constraint(ALLOC_IN_RC(float_reg_legacy));
5620 match(RegF);
5621
5622 format %{ %}
5623 interface(REG_INTER);
5624 %}
5625
5626 // Float register operands
5627 operand vlRegF() %{
5628 constraint(ALLOC_IN_RC(float_reg_vl));
5629 match(RegF);
5630
5631 format %{ %}
5632 interface(REG_INTER);
5633 %}
5634
5635 // Double register operands
5636 operand regD() %{
5637 constraint(ALLOC_IN_RC(double_reg));
5638 match(RegD);
5639
5640 format %{ %}
5641 interface(REG_INTER);
5642 %}
5643
5644 // Double register operands
5645 operand legRegD() %{
5646 constraint(ALLOC_IN_RC(double_reg_legacy));
5647 match(RegD);
5648
5649 format %{ %}
5650 interface(REG_INTER);
5651 %}
5652
5653 // Double register operands
5654 operand vlRegD() %{
5655 constraint(ALLOC_IN_RC(double_reg_vl));
5656 match(RegD);
5657
5658 format %{ %}
5659 interface(REG_INTER);
5660 %}
5661
5662 //----------Memory Operands----------------------------------------------------
5663 // Direct Memory Operand
5664 // operand direct(immP addr)
5665 // %{
5666 // match(addr);
5667
5668 // format %{ "[$addr]" %}
5669 // interface(MEMORY_INTER) %{
5670 // base(0xFFFFFFFF);
5671 // index(0x4);
5672 // scale(0x0);
5673 // disp($addr);
5674 // %}
5675 // %}
5676
5677 // Indirect Memory Operand
5678 operand indirect(any_RegP reg)
5679 %{
5680 constraint(ALLOC_IN_RC(ptr_reg));
5681 match(reg);
5682
5683 format %{ "[$reg]" %}
5684 interface(MEMORY_INTER) %{
5685 base($reg);
5686 index(0x4);
5687 scale(0x0);
5688 disp(0x0);
5689 %}
5690 %}
5691
5692 // Indirect Memory Plus Short Offset Operand
5693 operand indOffset8(any_RegP reg, immL8 off)
5694 %{
5695 constraint(ALLOC_IN_RC(ptr_reg));
5696 match(AddP reg off);
5697
5698 format %{ "[$reg + $off (8-bit)]" %}
5699 interface(MEMORY_INTER) %{
5700 base($reg);
5701 index(0x4);
5702 scale(0x0);
5703 disp($off);
5704 %}
5705 %}
5706
5707 // Indirect Memory Plus Long Offset Operand
5708 operand indOffset32(any_RegP reg, immL32 off)
5709 %{
5710 constraint(ALLOC_IN_RC(ptr_reg));
5711 match(AddP reg off);
5712
5713 format %{ "[$reg + $off (32-bit)]" %}
5714 interface(MEMORY_INTER) %{
5715 base($reg);
5716 index(0x4);
5717 scale(0x0);
5718 disp($off);
5719 %}
5720 %}
5721
5722 // Indirect Memory Plus Index Register Plus Offset Operand
5723 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5724 %{
5725 constraint(ALLOC_IN_RC(ptr_reg));
5726 match(AddP (AddP reg lreg) off);
5727
5728 op_cost(10);
5729 format %{"[$reg + $off + $lreg]" %}
5730 interface(MEMORY_INTER) %{
5731 base($reg);
5732 index($lreg);
5733 scale(0x0);
5734 disp($off);
5735 %}
5736 %}
5737
5738 // Indirect Memory Plus Index Register Plus Offset Operand
5739 operand indIndex(any_RegP reg, rRegL lreg)
5740 %{
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(AddP reg lreg);
5743
5744 op_cost(10);
5745 format %{"[$reg + $lreg]" %}
5746 interface(MEMORY_INTER) %{
5747 base($reg);
5748 index($lreg);
5749 scale(0x0);
5750 disp(0x0);
5751 %}
5752 %}
5753
5754 // Indirect Memory Times Scale Plus Index Register
5755 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5756 %{
5757 constraint(ALLOC_IN_RC(ptr_reg));
5758 match(AddP reg (LShiftL lreg scale));
5759
5760 op_cost(10);
5761 format %{"[$reg + $lreg << $scale]" %}
5762 interface(MEMORY_INTER) %{
5763 base($reg);
5764 index($lreg);
5765 scale($scale);
5766 disp(0x0);
5767 %}
5768 %}
5769
5770 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5771 %{
5772 constraint(ALLOC_IN_RC(ptr_reg));
5773 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5774 match(AddP reg (LShiftL (ConvI2L idx) scale));
5775
5776 op_cost(10);
5777 format %{"[$reg + pos $idx << $scale]" %}
5778 interface(MEMORY_INTER) %{
5779 base($reg);
5780 index($idx);
5781 scale($scale);
5782 disp(0x0);
5783 %}
5784 %}
5785
5786 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5787 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5788 %{
5789 constraint(ALLOC_IN_RC(ptr_reg));
5790 match(AddP (AddP reg (LShiftL lreg scale)) off);
5791
5792 op_cost(10);
5793 format %{"[$reg + $off + $lreg << $scale]" %}
5794 interface(MEMORY_INTER) %{
5795 base($reg);
5796 index($lreg);
5797 scale($scale);
5798 disp($off);
5799 %}
5800 %}
5801
5802 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5803 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5804 %{
5805 constraint(ALLOC_IN_RC(ptr_reg));
5806 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5807 match(AddP (AddP reg (ConvI2L idx)) off);
5808
5809 op_cost(10);
5810 format %{"[$reg + $off + $idx]" %}
5811 interface(MEMORY_INTER) %{
5812 base($reg);
5813 index($idx);
5814 scale(0x0);
5815 disp($off);
5816 %}
5817 %}
5818
5819 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5820 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5821 %{
5822 constraint(ALLOC_IN_RC(ptr_reg));
5823 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5824 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5825
5826 op_cost(10);
5827 format %{"[$reg + $off + $idx << $scale]" %}
5828 interface(MEMORY_INTER) %{
5829 base($reg);
5830 index($idx);
5831 scale($scale);
5832 disp($off);
5833 %}
5834 %}
5835
5836 // Indirect Narrow Oop Operand
5837 operand indCompressedOop(rRegN reg) %{
5838 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5839 constraint(ALLOC_IN_RC(ptr_reg));
5840 match(DecodeN reg);
5841
5842 op_cost(10);
5843 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5844 interface(MEMORY_INTER) %{
5845 base(0xc); // R12
5846 index($reg);
5847 scale(0x3);
5848 disp(0x0);
5849 %}
5850 %}
5851
5852 // Indirect Narrow Oop Plus Offset Operand
5853 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5854 // we can't free r12 even with CompressedOops::base() == nullptr.
5855 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5856 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5857 constraint(ALLOC_IN_RC(ptr_reg));
5858 match(AddP (DecodeN reg) off);
5859
5860 op_cost(10);
5861 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5862 interface(MEMORY_INTER) %{
5863 base(0xc); // R12
5864 index($reg);
5865 scale(0x3);
5866 disp($off);
5867 %}
5868 %}
5869
5870 // Indirect Memory Operand
5871 operand indirectNarrow(rRegN reg)
5872 %{
5873 predicate(CompressedOops::shift() == 0);
5874 constraint(ALLOC_IN_RC(ptr_reg));
5875 match(DecodeN reg);
5876
5877 format %{ "[$reg]" %}
5878 interface(MEMORY_INTER) %{
5879 base($reg);
5880 index(0x4);
5881 scale(0x0);
5882 disp(0x0);
5883 %}
5884 %}
5885
5886 // Indirect Memory Plus Short Offset Operand
5887 operand indOffset8Narrow(rRegN reg, immL8 off)
5888 %{
5889 predicate(CompressedOops::shift() == 0);
5890 constraint(ALLOC_IN_RC(ptr_reg));
5891 match(AddP (DecodeN reg) off);
5892
5893 format %{ "[$reg + $off (8-bit)]" %}
5894 interface(MEMORY_INTER) %{
5895 base($reg);
5896 index(0x4);
5897 scale(0x0);
5898 disp($off);
5899 %}
5900 %}
5901
5902 // Indirect Memory Plus Long Offset Operand
5903 operand indOffset32Narrow(rRegN reg, immL32 off)
5904 %{
5905 predicate(CompressedOops::shift() == 0);
5906 constraint(ALLOC_IN_RC(ptr_reg));
5907 match(AddP (DecodeN reg) off);
5908
5909 format %{ "[$reg + $off (32-bit)]" %}
5910 interface(MEMORY_INTER) %{
5911 base($reg);
5912 index(0x4);
5913 scale(0x0);
5914 disp($off);
5915 %}
5916 %}
5917
5918 // Indirect Memory Plus Index Register Plus Offset Operand
5919 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5920 %{
5921 predicate(CompressedOops::shift() == 0);
5922 constraint(ALLOC_IN_RC(ptr_reg));
5923 match(AddP (AddP (DecodeN reg) lreg) off);
5924
5925 op_cost(10);
5926 format %{"[$reg + $off + $lreg]" %}
5927 interface(MEMORY_INTER) %{
5928 base($reg);
5929 index($lreg);
5930 scale(0x0);
5931 disp($off);
5932 %}
5933 %}
5934
5935 // Indirect Memory Plus Index Register Plus Offset Operand
5936 operand indIndexNarrow(rRegN reg, rRegL lreg)
5937 %{
5938 predicate(CompressedOops::shift() == 0);
5939 constraint(ALLOC_IN_RC(ptr_reg));
5940 match(AddP (DecodeN reg) lreg);
5941
5942 op_cost(10);
5943 format %{"[$reg + $lreg]" %}
5944 interface(MEMORY_INTER) %{
5945 base($reg);
5946 index($lreg);
5947 scale(0x0);
5948 disp(0x0);
5949 %}
5950 %}
5951
5952 // Indirect Memory Times Scale Plus Index Register
5953 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5954 %{
5955 predicate(CompressedOops::shift() == 0);
5956 constraint(ALLOC_IN_RC(ptr_reg));
5957 match(AddP (DecodeN reg) (LShiftL lreg scale));
5958
5959 op_cost(10);
5960 format %{"[$reg + $lreg << $scale]" %}
5961 interface(MEMORY_INTER) %{
5962 base($reg);
5963 index($lreg);
5964 scale($scale);
5965 disp(0x0);
5966 %}
5967 %}
5968
5969 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5970 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5971 %{
5972 predicate(CompressedOops::shift() == 0);
5973 constraint(ALLOC_IN_RC(ptr_reg));
5974 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5975
5976 op_cost(10);
5977 format %{"[$reg + $off + $lreg << $scale]" %}
5978 interface(MEMORY_INTER) %{
5979 base($reg);
5980 index($lreg);
5981 scale($scale);
5982 disp($off);
5983 %}
5984 %}
5985
5986 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5987 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5988 %{
5989 constraint(ALLOC_IN_RC(ptr_reg));
5990 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5991 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5992
5993 op_cost(10);
5994 format %{"[$reg + $off + $idx]" %}
5995 interface(MEMORY_INTER) %{
5996 base($reg);
5997 index($idx);
5998 scale(0x0);
5999 disp($off);
6000 %}
6001 %}
6002
6003 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
6004 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
6005 %{
6006 constraint(ALLOC_IN_RC(ptr_reg));
6007 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
6008 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
6009
6010 op_cost(10);
6011 format %{"[$reg + $off + $idx << $scale]" %}
6012 interface(MEMORY_INTER) %{
6013 base($reg);
6014 index($idx);
6015 scale($scale);
6016 disp($off);
6017 %}
6018 %}
6019
6020 //----------Special Memory Operands--------------------------------------------
6021 // Stack Slot Operand - This operand is used for loading and storing temporary
6022 // values on the stack where a match requires a value to
6023 // flow through memory.
6024 operand stackSlotP(sRegP reg)
6025 %{
6026 constraint(ALLOC_IN_RC(stack_slots));
6027 // No match rule because this operand is only generated in matching
6028
6029 format %{ "[$reg]" %}
6030 interface(MEMORY_INTER) %{
6031 base(0x4); // RSP
6032 index(0x4); // No Index
6033 scale(0x0); // No Scale
6034 disp($reg); // Stack Offset
6035 %}
6036 %}
6037
6038 operand stackSlotI(sRegI reg)
6039 %{
6040 constraint(ALLOC_IN_RC(stack_slots));
6041 // No match rule because this operand is only generated in matching
6042
6043 format %{ "[$reg]" %}
6044 interface(MEMORY_INTER) %{
6045 base(0x4); // RSP
6046 index(0x4); // No Index
6047 scale(0x0); // No Scale
6048 disp($reg); // Stack Offset
6049 %}
6050 %}
6051
6052 operand stackSlotF(sRegF reg)
6053 %{
6054 constraint(ALLOC_IN_RC(stack_slots));
6055 // No match rule because this operand is only generated in matching
6056
6057 format %{ "[$reg]" %}
6058 interface(MEMORY_INTER) %{
6059 base(0x4); // RSP
6060 index(0x4); // No Index
6061 scale(0x0); // No Scale
6062 disp($reg); // Stack Offset
6063 %}
6064 %}
6065
6066 operand stackSlotD(sRegD reg)
6067 %{
6068 constraint(ALLOC_IN_RC(stack_slots));
6069 // No match rule because this operand is only generated in matching
6070
6071 format %{ "[$reg]" %}
6072 interface(MEMORY_INTER) %{
6073 base(0x4); // RSP
6074 index(0x4); // No Index
6075 scale(0x0); // No Scale
6076 disp($reg); // Stack Offset
6077 %}
6078 %}
6079 operand stackSlotL(sRegL reg)
6080 %{
6081 constraint(ALLOC_IN_RC(stack_slots));
6082 // No match rule because this operand is only generated in matching
6083
6084 format %{ "[$reg]" %}
6085 interface(MEMORY_INTER) %{
6086 base(0x4); // RSP
6087 index(0x4); // No Index
6088 scale(0x0); // No Scale
6089 disp($reg); // Stack Offset
6090 %}
6091 %}
6092
6093 //----------Conditional Branch Operands----------------------------------------
6094 // Comparison Op - This is the operation of the comparison, and is limited to
6095 // the following set of codes:
6096 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6097 //
6098 // Other attributes of the comparison, such as unsignedness, are specified
6099 // by the comparison instruction that sets a condition code flags register.
6100 // That result is represented by a flags operand whose subtype is appropriate
6101 // to the unsignedness (etc.) of the comparison.
6102 //
6103 // Later, the instruction which matches both the Comparison Op (a Bool) and
6104 // the flags (produced by the Cmp) specifies the coding of the comparison op
6105 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6106
6107 // Comparison Code
6108 operand cmpOp()
6109 %{
6110 match(Bool);
6111
6112 format %{ "" %}
6113 interface(COND_INTER) %{
6114 equal(0x4, "e");
6115 not_equal(0x5, "ne");
6116 less(0xc, "l");
6117 greater_equal(0xd, "ge");
6118 less_equal(0xe, "le");
6119 greater(0xf, "g");
6120 overflow(0x0, "o");
6121 no_overflow(0x1, "no");
6122 %}
6123 %}
6124
6125 // Comparison Code, unsigned compare. Used by FP also, with
6126 // C2 (unordered) turned into GT or LT already. The other bits
6127 // C0 and C3 are turned into Carry & Zero flags.
6128 operand cmpOpU()
6129 %{
6130 match(Bool);
6131
6132 format %{ "" %}
6133 interface(COND_INTER) %{
6134 equal(0x4, "e");
6135 not_equal(0x5, "ne");
6136 less(0x2, "b");
6137 greater_equal(0x3, "ae");
6138 less_equal(0x6, "be");
6139 greater(0x7, "a");
6140 overflow(0x0, "o");
6141 no_overflow(0x1, "no");
6142 %}
6143 %}
6144
6145
6146 // Floating comparisons that don't require any fixup for the unordered case,
6147 // If both inputs of the comparison are the same, ZF is always set so we
6148 // don't need to use cmpOpUCF2 for eq/ne
6149 operand cmpOpUCF() %{
6150 match(Bool);
6151 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6152 (n->as_Bool()->_test._test == BoolTest::lt ||
6153 n->as_Bool()->_test._test == BoolTest::ge ||
6154 n->as_Bool()->_test._test == BoolTest::le ||
6155 n->as_Bool()->_test._test == BoolTest::gt ||
6156 n->in(1)->in(1) == n->in(1)->in(2)));
6157 format %{ "" %}
6158 interface(COND_INTER) %{
6159 equal(0xb, "np");
6160 not_equal(0xa, "p");
6161 less(0x2, "b");
6162 greater_equal(0x3, "ae");
6163 less_equal(0x6, "be");
6164 greater(0x7, "a");
6165 overflow(0x0, "o");
6166 no_overflow(0x1, "no");
6167 %}
6168 %}
6169
6170
6171 // Floating comparisons that can be fixed up with extra conditional jumps
6172 operand cmpOpUCF2() %{
6173 match(Bool);
6174 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6175 (n->as_Bool()->_test._test == BoolTest::ne ||
6176 n->as_Bool()->_test._test == BoolTest::eq) &&
6177 n->in(1)->in(1) != n->in(1)->in(2));
6178 format %{ "" %}
6179 interface(COND_INTER) %{
6180 equal(0x4, "e");
6181 not_equal(0x5, "ne");
6182 less(0x2, "b");
6183 greater_equal(0x3, "ae");
6184 less_equal(0x6, "be");
6185 greater(0x7, "a");
6186 overflow(0x0, "o");
6187 no_overflow(0x1, "no");
6188 %}
6189 %}
6190
6191
6192 // Floating point comparisons that set condition flags to test more directly,
6193 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6194 // are used for L (<) and LE (<=) conditions. It's important to convert these
6195 // latter conditions to ones that use unsigned tests before passing into an
6196 // instruction because the preceding comparison might be based on a three way
6197 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6198 operand cmpOpUCFE()
6199 %{
6200 match(Bool);
6201 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6202 (n->as_Bool()->_test._test == BoolTest::ne ||
6203 n->as_Bool()->_test._test == BoolTest::eq ||
6204 n->as_Bool()->_test._test == BoolTest::lt ||
6205 n->as_Bool()->_test._test == BoolTest::ge ||
6206 n->as_Bool()->_test._test == BoolTest::le ||
6207 n->as_Bool()->_test._test == BoolTest::gt));
6208
6209 format %{ "" %}
6210 interface(COND_INTER) %{
6211 equal(0x4, "e");
6212 not_equal(0x5, "ne");
6213 less(0x2, "b");
6214 greater_equal(0x3, "ae");
6215 less_equal(0x6, "be");
6216 greater(0x7, "a");
6217 overflow(0x0, "o");
6218 no_overflow(0x1, "no");
6219 %}
6220 %}
6221
6222 // Operands for bound floating pointer register arguments
6223 operand rxmm0() %{
6224 constraint(ALLOC_IN_RC(xmm0_reg));
6225 match(VecX);
6226 format%{%}
6227 interface(REG_INTER);
6228 %}
6229
6230 // Vectors
6231
6232 // Dummy generic vector class. Should be used for all vector operands.
6233 // Replaced with vec[SDXYZ] during post-selection pass.
6234 operand vec() %{
6235 constraint(ALLOC_IN_RC(dynamic));
6236 match(VecX);
6237 match(VecY);
6238 match(VecZ);
6239 match(VecS);
6240 match(VecD);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6247 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6248 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6249 // runtime code generation via reg_class_dynamic.
6250 operand legVec() %{
6251 constraint(ALLOC_IN_RC(dynamic));
6252 match(VecX);
6253 match(VecY);
6254 match(VecZ);
6255 match(VecS);
6256 match(VecD);
6257
6258 format %{ %}
6259 interface(REG_INTER);
6260 %}
6261
6262 // Replaces vec during post-selection cleanup. See above.
6263 operand vecS() %{
6264 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6265 match(VecS);
6266
6267 format %{ %}
6268 interface(REG_INTER);
6269 %}
6270
6271 // Replaces legVec during post-selection cleanup. See above.
6272 operand legVecS() %{
6273 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6274 match(VecS);
6275
6276 format %{ %}
6277 interface(REG_INTER);
6278 %}
6279
6280 // Replaces vec during post-selection cleanup. See above.
6281 operand vecD() %{
6282 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6283 match(VecD);
6284
6285 format %{ %}
6286 interface(REG_INTER);
6287 %}
6288
6289 // Replaces legVec during post-selection cleanup. See above.
6290 operand legVecD() %{
6291 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6292 match(VecD);
6293
6294 format %{ %}
6295 interface(REG_INTER);
6296 %}
6297
6298 // Replaces vec during post-selection cleanup. See above.
6299 operand vecX() %{
6300 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6301 match(VecX);
6302
6303 format %{ %}
6304 interface(REG_INTER);
6305 %}
6306
6307 // Replaces legVec during post-selection cleanup. See above.
6308 operand legVecX() %{
6309 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6310 match(VecX);
6311
6312 format %{ %}
6313 interface(REG_INTER);
6314 %}
6315
6316 // Replaces vec during post-selection cleanup. See above.
6317 operand vecY() %{
6318 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6319 match(VecY);
6320
6321 format %{ %}
6322 interface(REG_INTER);
6323 %}
6324
6325 // Replaces legVec during post-selection cleanup. See above.
6326 operand legVecY() %{
6327 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6328 match(VecY);
6329
6330 format %{ %}
6331 interface(REG_INTER);
6332 %}
6333
6334 // Replaces vec during post-selection cleanup. See above.
6335 operand vecZ() %{
6336 constraint(ALLOC_IN_RC(vectorz_reg));
6337 match(VecZ);
6338
6339 format %{ %}
6340 interface(REG_INTER);
6341 %}
6342
6343 // Replaces legVec during post-selection cleanup. See above.
6344 operand legVecZ() %{
6345 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6346 match(VecZ);
6347
6348 format %{ %}
6349 interface(REG_INTER);
6350 %}
6351
6352 //----------OPERAND CLASSES----------------------------------------------------
6353 // Operand Classes are groups of operands that are used as to simplify
6354 // instruction definitions by not requiring the AD writer to specify separate
6355 // instructions for every form of operand when the instruction accepts
6356 // multiple operand types with the same basic encoding and format. The classic
6357 // case of this is memory operands.
6358
6359 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6360 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6361 indCompressedOop, indCompressedOopOffset,
6362 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6363 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6364 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6365
6366 //----------PIPELINE-----------------------------------------------------------
6367 // Rules which define the behavior of the target architectures pipeline.
6368 pipeline %{
6369
6370 //----------ATTRIBUTES---------------------------------------------------------
6371 attributes %{
6372 variable_size_instructions; // Fixed size instructions
6373 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6374 instruction_unit_size = 1; // An instruction is 1 bytes long
6375 instruction_fetch_unit_size = 16; // The processor fetches one line
6376 instruction_fetch_units = 1; // of 16 bytes
6377 %}
6378
6379 //----------RESOURCES----------------------------------------------------------
6380 // Resources are the functional units available to the machine
6381
6382 // Generic P2/P3 pipeline
6383 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6384 // 3 instructions decoded per cycle.
6385 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6386 // 3 ALU op, only ALU0 handles mul instructions.
6387 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6388 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6389 BR, FPU,
6390 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6391
6392 //----------PIPELINE DESCRIPTION-----------------------------------------------
6393 // Pipeline Description specifies the stages in the machine's pipeline
6394
6395 // Generic P2/P3 pipeline
6396 pipe_desc(S0, S1, S2, S3, S4, S5);
6397
6398 //----------PIPELINE CLASSES---------------------------------------------------
6399 // Pipeline Classes describe the stages in which input and output are
6400 // referenced by the hardware pipeline.
6401
6402 // Naming convention: ialu or fpu
6403 // Then: _reg
6404 // Then: _reg if there is a 2nd register
6405 // Then: _long if it's a pair of instructions implementing a long
6406 // Then: _fat if it requires the big decoder
6407 // Or: _mem if it requires the big decoder and a memory unit.
6408
6409 // Integer ALU reg operation
6410 pipe_class ialu_reg(rRegI dst)
6411 %{
6412 single_instruction;
6413 dst : S4(write);
6414 dst : S3(read);
6415 DECODE : S0; // any decoder
6416 ALU : S3; // any alu
6417 %}
6418
6419 // Long ALU reg operation
6420 pipe_class ialu_reg_long(rRegL dst)
6421 %{
6422 instruction_count(2);
6423 dst : S4(write);
6424 dst : S3(read);
6425 DECODE : S0(2); // any 2 decoders
6426 ALU : S3(2); // both alus
6427 %}
6428
6429 // Integer ALU reg operation using big decoder
6430 pipe_class ialu_reg_fat(rRegI dst)
6431 %{
6432 single_instruction;
6433 dst : S4(write);
6434 dst : S3(read);
6435 D0 : S0; // big decoder only
6436 ALU : S3; // any alu
6437 %}
6438
6439 // Integer ALU reg-reg operation
6440 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6441 %{
6442 single_instruction;
6443 dst : S4(write);
6444 src : S3(read);
6445 DECODE : S0; // any decoder
6446 ALU : S3; // any alu
6447 %}
6448
6449 // Integer ALU reg-reg operation
6450 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6451 %{
6452 single_instruction;
6453 dst : S4(write);
6454 src : S3(read);
6455 D0 : S0; // big decoder only
6456 ALU : S3; // any alu
6457 %}
6458
6459 // Integer ALU reg-mem operation
6460 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6461 %{
6462 single_instruction;
6463 dst : S5(write);
6464 mem : S3(read);
6465 D0 : S0; // big decoder only
6466 ALU : S4; // any alu
6467 MEM : S3; // any mem
6468 %}
6469
6470 // Integer mem operation (prefetch)
6471 pipe_class ialu_mem(memory mem)
6472 %{
6473 single_instruction;
6474 mem : S3(read);
6475 D0 : S0; // big decoder only
6476 MEM : S3; // any mem
6477 %}
6478
6479 // Integer Store to Memory
6480 pipe_class ialu_mem_reg(memory mem, rRegI src)
6481 %{
6482 single_instruction;
6483 mem : S3(read);
6484 src : S5(read);
6485 D0 : S0; // big decoder only
6486 ALU : S4; // any alu
6487 MEM : S3;
6488 %}
6489
6490 // // Long Store to Memory
6491 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6492 // %{
6493 // instruction_count(2);
6494 // mem : S3(read);
6495 // src : S5(read);
6496 // D0 : S0(2); // big decoder only; twice
6497 // ALU : S4(2); // any 2 alus
6498 // MEM : S3(2); // Both mems
6499 // %}
6500
6501 // Integer Store to Memory
6502 pipe_class ialu_mem_imm(memory mem)
6503 %{
6504 single_instruction;
6505 mem : S3(read);
6506 D0 : S0; // big decoder only
6507 ALU : S4; // any alu
6508 MEM : S3;
6509 %}
6510
6511 // Integer ALU0 reg-reg operation
6512 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6513 %{
6514 single_instruction;
6515 dst : S4(write);
6516 src : S3(read);
6517 D0 : S0; // Big decoder only
6518 ALU0 : S3; // only alu0
6519 %}
6520
6521 // Integer ALU0 reg-mem operation
6522 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6523 %{
6524 single_instruction;
6525 dst : S5(write);
6526 mem : S3(read);
6527 D0 : S0; // big decoder only
6528 ALU0 : S4; // ALU0 only
6529 MEM : S3; // any mem
6530 %}
6531
6532 // Integer ALU reg-reg operation
6533 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6534 %{
6535 single_instruction;
6536 cr : S4(write);
6537 src1 : S3(read);
6538 src2 : S3(read);
6539 DECODE : S0; // any decoder
6540 ALU : S3; // any alu
6541 %}
6542
6543 // Integer ALU reg-imm operation
6544 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6545 %{
6546 single_instruction;
6547 cr : S4(write);
6548 src1 : S3(read);
6549 DECODE : S0; // any decoder
6550 ALU : S3; // any alu
6551 %}
6552
6553 // Integer ALU reg-mem operation
6554 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6555 %{
6556 single_instruction;
6557 cr : S4(write);
6558 src1 : S3(read);
6559 src2 : S3(read);
6560 D0 : S0; // big decoder only
6561 ALU : S4; // any alu
6562 MEM : S3;
6563 %}
6564
6565 // Conditional move reg-reg
6566 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6567 %{
6568 instruction_count(4);
6569 y : S4(read);
6570 q : S3(read);
6571 p : S3(read);
6572 DECODE : S0(4); // any decoder
6573 %}
6574
6575 // Conditional move reg-reg
6576 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6577 %{
6578 single_instruction;
6579 dst : S4(write);
6580 src : S3(read);
6581 cr : S3(read);
6582 DECODE : S0; // any decoder
6583 %}
6584
6585 // Conditional move reg-mem
6586 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6587 %{
6588 single_instruction;
6589 dst : S4(write);
6590 src : S3(read);
6591 cr : S3(read);
6592 DECODE : S0; // any decoder
6593 MEM : S3;
6594 %}
6595
6596 // Conditional move reg-reg long
6597 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6598 %{
6599 single_instruction;
6600 dst : S4(write);
6601 src : S3(read);
6602 cr : S3(read);
6603 DECODE : S0(2); // any 2 decoders
6604 %}
6605
6606 // Float reg-reg operation
6607 pipe_class fpu_reg(regD dst)
6608 %{
6609 instruction_count(2);
6610 dst : S3(read);
6611 DECODE : S0(2); // any 2 decoders
6612 FPU : S3;
6613 %}
6614
6615 // Float reg-reg operation
6616 pipe_class fpu_reg_reg(regD dst, regD src)
6617 %{
6618 instruction_count(2);
6619 dst : S4(write);
6620 src : S3(read);
6621 DECODE : S0(2); // any 2 decoders
6622 FPU : S3;
6623 %}
6624
6625 // Float reg-reg operation
6626 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6627 %{
6628 instruction_count(3);
6629 dst : S4(write);
6630 src1 : S3(read);
6631 src2 : S3(read);
6632 DECODE : S0(3); // any 3 decoders
6633 FPU : S3(2);
6634 %}
6635
6636 // Float reg-reg operation
6637 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6638 %{
6639 instruction_count(4);
6640 dst : S4(write);
6641 src1 : S3(read);
6642 src2 : S3(read);
6643 src3 : S3(read);
6644 DECODE : S0(4); // any 3 decoders
6645 FPU : S3(2);
6646 %}
6647
6648 // Float reg-reg operation
6649 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6650 %{
6651 instruction_count(4);
6652 dst : S4(write);
6653 src1 : S3(read);
6654 src2 : S3(read);
6655 src3 : S3(read);
6656 DECODE : S1(3); // any 3 decoders
6657 D0 : S0; // Big decoder only
6658 FPU : S3(2);
6659 MEM : S3;
6660 %}
6661
6662 // Float reg-mem operation
6663 pipe_class fpu_reg_mem(regD dst, memory mem)
6664 %{
6665 instruction_count(2);
6666 dst : S5(write);
6667 mem : S3(read);
6668 D0 : S0; // big decoder only
6669 DECODE : S1; // any decoder for FPU POP
6670 FPU : S4;
6671 MEM : S3; // any mem
6672 %}
6673
6674 // Float reg-mem operation
6675 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6676 %{
6677 instruction_count(3);
6678 dst : S5(write);
6679 src1 : S3(read);
6680 mem : S3(read);
6681 D0 : S0; // big decoder only
6682 DECODE : S1(2); // any decoder for FPU POP
6683 FPU : S4;
6684 MEM : S3; // any mem
6685 %}
6686
6687 // Float mem-reg operation
6688 pipe_class fpu_mem_reg(memory mem, regD src)
6689 %{
6690 instruction_count(2);
6691 src : S5(read);
6692 mem : S3(read);
6693 DECODE : S0; // any decoder for FPU PUSH
6694 D0 : S1; // big decoder only
6695 FPU : S4;
6696 MEM : S3; // any mem
6697 %}
6698
6699 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6700 %{
6701 instruction_count(3);
6702 src1 : S3(read);
6703 src2 : S3(read);
6704 mem : S3(read);
6705 DECODE : S0(2); // any decoder for FPU PUSH
6706 D0 : S1; // big decoder only
6707 FPU : S4;
6708 MEM : S3; // any mem
6709 %}
6710
6711 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6712 %{
6713 instruction_count(3);
6714 src1 : S3(read);
6715 src2 : S3(read);
6716 mem : S4(read);
6717 DECODE : S0; // any decoder for FPU PUSH
6718 D0 : S0(2); // big decoder only
6719 FPU : S4;
6720 MEM : S3(2); // any mem
6721 %}
6722
6723 pipe_class fpu_mem_mem(memory dst, memory src1)
6724 %{
6725 instruction_count(2);
6726 src1 : S3(read);
6727 dst : S4(read);
6728 D0 : S0(2); // big decoder only
6729 MEM : S3(2); // any mem
6730 %}
6731
6732 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6733 %{
6734 instruction_count(3);
6735 src1 : S3(read);
6736 src2 : S3(read);
6737 dst : S4(read);
6738 D0 : S0(3); // big decoder only
6739 FPU : S4;
6740 MEM : S3(3); // any mem
6741 %}
6742
6743 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6744 %{
6745 instruction_count(3);
6746 src1 : S4(read);
6747 mem : S4(read);
6748 DECODE : S0; // any decoder for FPU PUSH
6749 D0 : S0(2); // big decoder only
6750 FPU : S4;
6751 MEM : S3(2); // any mem
6752 %}
6753
6754 // Float load constant
6755 pipe_class fpu_reg_con(regD dst)
6756 %{
6757 instruction_count(2);
6758 dst : S5(write);
6759 D0 : S0; // big decoder only for the load
6760 DECODE : S1; // any decoder for FPU POP
6761 FPU : S4;
6762 MEM : S3; // any mem
6763 %}
6764
6765 // Float load constant
6766 pipe_class fpu_reg_reg_con(regD dst, regD src)
6767 %{
6768 instruction_count(3);
6769 dst : S5(write);
6770 src : S3(read);
6771 D0 : S0; // big decoder only for the load
6772 DECODE : S1(2); // any decoder for FPU POP
6773 FPU : S4;
6774 MEM : S3; // any mem
6775 %}
6776
6777 // UnConditional branch
6778 pipe_class pipe_jmp(label labl)
6779 %{
6780 single_instruction;
6781 BR : S3;
6782 %}
6783
6784 // Conditional branch
6785 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6786 %{
6787 single_instruction;
6788 cr : S1(read);
6789 BR : S3;
6790 %}
6791
6792 // Allocation idiom
6793 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6794 %{
6795 instruction_count(1); force_serialization;
6796 fixed_latency(6);
6797 heap_ptr : S3(read);
6798 DECODE : S0(3);
6799 D0 : S2;
6800 MEM : S3;
6801 ALU : S3(2);
6802 dst : S5(write);
6803 BR : S5;
6804 %}
6805
6806 // Generic big/slow expanded idiom
6807 pipe_class pipe_slow()
6808 %{
6809 instruction_count(10); multiple_bundles; force_serialization;
6810 fixed_latency(100);
6811 D0 : S0(2);
6812 MEM : S3(2);
6813 %}
6814
6815 // The real do-nothing guy
6816 pipe_class empty()
6817 %{
6818 instruction_count(0);
6819 %}
6820
6821 // Define the class for the Nop node
6822 define
6823 %{
6824 MachNop = empty;
6825 %}
6826
6827 %}
6828
6829 //----------INSTRUCTIONS-------------------------------------------------------
6830 //
6831 // match -- States which machine-independent subtree may be replaced
6832 // by this instruction.
6833 // ins_cost -- The estimated cost of this instruction is used by instruction
6834 // selection to identify a minimum cost tree of machine
6835 // instructions that matches a tree of machine-independent
6836 // instructions.
6837 // format -- A string providing the disassembly for this instruction.
6838 // The value of an instruction's operand may be inserted
6839 // by referring to it with a '$' prefix.
6840 // opcode -- Three instruction opcodes may be provided. These are referred
6841 // to within an encode class as $primary, $secondary, and $tertiary
6842 // rrspectively. The primary opcode is commonly used to
6843 // indicate the type of machine instruction, while secondary
6844 // and tertiary are often used for prefix options or addressing
6845 // modes.
6846 // ins_encode -- A list of encode classes with parameters. The encode class
6847 // name must have been defined in an 'enc_class' specification
6848 // in the encode section of the architecture description.
6849
6850 // ============================================================================
6851
6852 instruct ShouldNotReachHere() %{
6853 match(Halt);
6854 format %{ "stop\t# ShouldNotReachHere" %}
6855 ins_encode %{
6856 if (is_reachable()) {
6857 const char* str = __ code_string(_halt_reason);
6858 __ stop(str);
6859 }
6860 %}
6861 ins_pipe(pipe_slow);
6862 %}
6863
6864 // ============================================================================
6865
6866 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6867 // Load Float
6868 instruct MoveF2VL(vlRegF dst, regF src) %{
6869 match(Set dst src);
6870 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6871 ins_encode %{
6872 ShouldNotReachHere();
6873 %}
6874 ins_pipe( fpu_reg_reg );
6875 %}
6876
6877 // Load Float
6878 instruct MoveF2LEG(legRegF dst, regF src) %{
6879 match(Set dst src);
6880 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6881 ins_encode %{
6882 ShouldNotReachHere();
6883 %}
6884 ins_pipe( fpu_reg_reg );
6885 %}
6886
6887 // Load Float
6888 instruct MoveVL2F(regF dst, vlRegF src) %{
6889 match(Set dst src);
6890 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6891 ins_encode %{
6892 ShouldNotReachHere();
6893 %}
6894 ins_pipe( fpu_reg_reg );
6895 %}
6896
6897 // Load Float
6898 instruct MoveLEG2F(regF dst, legRegF src) %{
6899 match(Set dst src);
6900 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6901 ins_encode %{
6902 ShouldNotReachHere();
6903 %}
6904 ins_pipe( fpu_reg_reg );
6905 %}
6906
6907 // Load Double
6908 instruct MoveD2VL(vlRegD dst, regD src) %{
6909 match(Set dst src);
6910 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6911 ins_encode %{
6912 ShouldNotReachHere();
6913 %}
6914 ins_pipe( fpu_reg_reg );
6915 %}
6916
6917 // Load Double
6918 instruct MoveD2LEG(legRegD dst, regD src) %{
6919 match(Set dst src);
6920 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6921 ins_encode %{
6922 ShouldNotReachHere();
6923 %}
6924 ins_pipe( fpu_reg_reg );
6925 %}
6926
6927 // Load Double
6928 instruct MoveVL2D(regD dst, vlRegD src) %{
6929 match(Set dst src);
6930 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6931 ins_encode %{
6932 ShouldNotReachHere();
6933 %}
6934 ins_pipe( fpu_reg_reg );
6935 %}
6936
6937 // Load Double
6938 instruct MoveLEG2D(regD dst, legRegD src) %{
6939 match(Set dst src);
6940 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6941 ins_encode %{
6942 ShouldNotReachHere();
6943 %}
6944 ins_pipe( fpu_reg_reg );
6945 %}
6946
6947 //----------Load/Store/Move Instructions---------------------------------------
6948 //----------Load Instructions--------------------------------------------------
6949
6950 // Load Byte (8 bit signed)
6951 instruct loadB(rRegI dst, memory mem)
6952 %{
6953 match(Set dst (LoadB mem));
6954
6955 ins_cost(125);
6956 format %{ "movsbl $dst, $mem\t# byte" %}
6957
6958 ins_encode %{
6959 __ movsbl($dst$$Register, $mem$$Address);
6960 %}
6961
6962 ins_pipe(ialu_reg_mem);
6963 %}
6964
6965 // Load Byte (8 bit signed) into Long Register
6966 instruct loadB2L(rRegL dst, memory mem)
6967 %{
6968 match(Set dst (ConvI2L (LoadB mem)));
6969
6970 ins_cost(125);
6971 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6972
6973 ins_encode %{
6974 __ movsbq($dst$$Register, $mem$$Address);
6975 %}
6976
6977 ins_pipe(ialu_reg_mem);
6978 %}
6979
6980 // Load Unsigned Byte (8 bit UNsigned)
6981 instruct loadUB(rRegI dst, memory mem)
6982 %{
6983 match(Set dst (LoadUB mem));
6984
6985 ins_cost(125);
6986 format %{ "movzbl $dst, $mem\t# ubyte" %}
6987
6988 ins_encode %{
6989 __ movzbl($dst$$Register, $mem$$Address);
6990 %}
6991
6992 ins_pipe(ialu_reg_mem);
6993 %}
6994
6995 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6996 instruct loadUB2L(rRegL dst, memory mem)
6997 %{
6998 match(Set dst (ConvI2L (LoadUB mem)));
6999
7000 ins_cost(125);
7001 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
7002
7003 ins_encode %{
7004 __ movzbq($dst$$Register, $mem$$Address);
7005 %}
7006
7007 ins_pipe(ialu_reg_mem);
7008 %}
7009
7010 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
7011 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7012 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
7013 effect(KILL cr);
7014
7015 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
7016 "andl $dst, right_n_bits($mask, 8)" %}
7017 ins_encode %{
7018 Register Rdst = $dst$$Register;
7019 __ movzbq(Rdst, $mem$$Address);
7020 __ andl(Rdst, $mask$$constant & right_n_bits(8));
7021 %}
7022 ins_pipe(ialu_reg_mem);
7023 %}
7024
7025 // Load Short (16 bit signed)
7026 instruct loadS(rRegI dst, memory mem)
7027 %{
7028 match(Set dst (LoadS mem));
7029
7030 ins_cost(125);
7031 format %{ "movswl $dst, $mem\t# short" %}
7032
7033 ins_encode %{
7034 __ movswl($dst$$Register, $mem$$Address);
7035 %}
7036
7037 ins_pipe(ialu_reg_mem);
7038 %}
7039
7040 // Load Short (16 bit signed) to Byte (8 bit signed)
7041 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7042 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
7043
7044 ins_cost(125);
7045 format %{ "movsbl $dst, $mem\t# short -> byte" %}
7046 ins_encode %{
7047 __ movsbl($dst$$Register, $mem$$Address);
7048 %}
7049 ins_pipe(ialu_reg_mem);
7050 %}
7051
7052 // Load Short (16 bit signed) into Long Register
7053 instruct loadS2L(rRegL dst, memory mem)
7054 %{
7055 match(Set dst (ConvI2L (LoadS mem)));
7056
7057 ins_cost(125);
7058 format %{ "movswq $dst, $mem\t# short -> long" %}
7059
7060 ins_encode %{
7061 __ movswq($dst$$Register, $mem$$Address);
7062 %}
7063
7064 ins_pipe(ialu_reg_mem);
7065 %}
7066
7067 // Load Unsigned Short/Char (16 bit UNsigned)
7068 instruct loadUS(rRegI dst, memory mem)
7069 %{
7070 match(Set dst (LoadUS mem));
7071
7072 ins_cost(125);
7073 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7074
7075 ins_encode %{
7076 __ movzwl($dst$$Register, $mem$$Address);
7077 %}
7078
7079 ins_pipe(ialu_reg_mem);
7080 %}
7081
7082 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7083 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7084 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7085
7086 ins_cost(125);
7087 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7088 ins_encode %{
7089 __ movsbl($dst$$Register, $mem$$Address);
7090 %}
7091 ins_pipe(ialu_reg_mem);
7092 %}
7093
7094 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7095 instruct loadUS2L(rRegL dst, memory mem)
7096 %{
7097 match(Set dst (ConvI2L (LoadUS mem)));
7098
7099 ins_cost(125);
7100 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7101
7102 ins_encode %{
7103 __ movzwq($dst$$Register, $mem$$Address);
7104 %}
7105
7106 ins_pipe(ialu_reg_mem);
7107 %}
7108
7109 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7110 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7111 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7112
7113 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7114 ins_encode %{
7115 __ movzbq($dst$$Register, $mem$$Address);
7116 %}
7117 ins_pipe(ialu_reg_mem);
7118 %}
7119
7120 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7121 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7122 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7123 effect(KILL cr);
7124
7125 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7126 "andl $dst, right_n_bits($mask, 16)" %}
7127 ins_encode %{
7128 Register Rdst = $dst$$Register;
7129 __ movzwq(Rdst, $mem$$Address);
7130 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7131 %}
7132 ins_pipe(ialu_reg_mem);
7133 %}
7134
7135 // Load Integer
7136 instruct loadI(rRegI dst, memory mem)
7137 %{
7138 match(Set dst (LoadI mem));
7139
7140 ins_cost(125);
7141 format %{ "movl $dst, $mem\t# int" %}
7142
7143 ins_encode %{
7144 __ movl($dst$$Register, $mem$$Address);
7145 %}
7146
7147 ins_pipe(ialu_reg_mem);
7148 %}
7149
7150 // Load Integer (32 bit signed) to Byte (8 bit signed)
7151 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7152 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7153
7154 ins_cost(125);
7155 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7156 ins_encode %{
7157 __ movsbl($dst$$Register, $mem$$Address);
7158 %}
7159 ins_pipe(ialu_reg_mem);
7160 %}
7161
7162 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7163 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7164 match(Set dst (AndI (LoadI mem) mask));
7165
7166 ins_cost(125);
7167 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7168 ins_encode %{
7169 __ movzbl($dst$$Register, $mem$$Address);
7170 %}
7171 ins_pipe(ialu_reg_mem);
7172 %}
7173
7174 // Load Integer (32 bit signed) to Short (16 bit signed)
7175 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7176 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7177
7178 ins_cost(125);
7179 format %{ "movswl $dst, $mem\t# int -> short" %}
7180 ins_encode %{
7181 __ movswl($dst$$Register, $mem$$Address);
7182 %}
7183 ins_pipe(ialu_reg_mem);
7184 %}
7185
7186 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7187 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7188 match(Set dst (AndI (LoadI mem) mask));
7189
7190 ins_cost(125);
7191 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7192 ins_encode %{
7193 __ movzwl($dst$$Register, $mem$$Address);
7194 %}
7195 ins_pipe(ialu_reg_mem);
7196 %}
7197
7198 // Load Integer into Long Register
7199 instruct loadI2L(rRegL dst, memory mem)
7200 %{
7201 match(Set dst (ConvI2L (LoadI mem)));
7202
7203 ins_cost(125);
7204 format %{ "movslq $dst, $mem\t# int -> long" %}
7205
7206 ins_encode %{
7207 __ movslq($dst$$Register, $mem$$Address);
7208 %}
7209
7210 ins_pipe(ialu_reg_mem);
7211 %}
7212
7213 // Load Integer with mask 0xFF into Long Register
7214 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7215 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7216
7217 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7218 ins_encode %{
7219 __ movzbq($dst$$Register, $mem$$Address);
7220 %}
7221 ins_pipe(ialu_reg_mem);
7222 %}
7223
7224 // Load Integer with mask 0xFFFF into Long Register
7225 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7226 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7227
7228 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7229 ins_encode %{
7230 __ movzwq($dst$$Register, $mem$$Address);
7231 %}
7232 ins_pipe(ialu_reg_mem);
7233 %}
7234
7235 // Load Integer with a 31-bit mask into Long Register
7236 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7237 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7238 effect(KILL cr);
7239
7240 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7241 "andl $dst, $mask" %}
7242 ins_encode %{
7243 Register Rdst = $dst$$Register;
7244 __ movl(Rdst, $mem$$Address);
7245 __ andl(Rdst, $mask$$constant);
7246 %}
7247 ins_pipe(ialu_reg_mem);
7248 %}
7249
7250 // Load Unsigned Integer into Long Register
7251 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7252 %{
7253 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7254
7255 ins_cost(125);
7256 format %{ "movl $dst, $mem\t# uint -> long" %}
7257
7258 ins_encode %{
7259 __ movl($dst$$Register, $mem$$Address);
7260 %}
7261
7262 ins_pipe(ialu_reg_mem);
7263 %}
7264
7265 // Load Long
7266 instruct loadL(rRegL dst, memory mem)
7267 %{
7268 match(Set dst (LoadL mem));
7269
7270 ins_cost(125);
7271 format %{ "movq $dst, $mem\t# long" %}
7272
7273 ins_encode %{
7274 __ movq($dst$$Register, $mem$$Address);
7275 %}
7276
7277 ins_pipe(ialu_reg_mem); // XXX
7278 %}
7279
7280 // Load Range
7281 instruct loadRange(rRegI dst, memory mem)
7282 %{
7283 match(Set dst (LoadRange mem));
7284
7285 ins_cost(125); // XXX
7286 format %{ "movl $dst, $mem\t# range" %}
7287 ins_encode %{
7288 __ movl($dst$$Register, $mem$$Address);
7289 %}
7290 ins_pipe(ialu_reg_mem);
7291 %}
7292
7293 // Load Pointer
7294 instruct loadP(rRegP dst, memory mem)
7295 %{
7296 match(Set dst (LoadP mem));
7297 predicate(n->as_Load()->barrier_data() == 0);
7298
7299 ins_cost(125); // XXX
7300 format %{ "movq $dst, $mem\t# ptr" %}
7301 ins_encode %{
7302 __ movq($dst$$Register, $mem$$Address);
7303 %}
7304 ins_pipe(ialu_reg_mem); // XXX
7305 %}
7306
7307 // Load Compressed Pointer
7308 instruct loadN(rRegN dst, memory mem)
7309 %{
7310 predicate(n->as_Load()->barrier_data() == 0);
7311 match(Set dst (LoadN mem));
7312
7313 ins_cost(125); // XXX
7314 format %{ "movl $dst, $mem\t# compressed ptr" %}
7315 ins_encode %{
7316 __ movl($dst$$Register, $mem$$Address);
7317 %}
7318 ins_pipe(ialu_reg_mem); // XXX
7319 %}
7320
7321
7322 // Load Klass Pointer
7323 instruct loadKlass(rRegP dst, memory mem)
7324 %{
7325 match(Set dst (LoadKlass mem));
7326
7327 ins_cost(125); // XXX
7328 format %{ "movq $dst, $mem\t# class" %}
7329 ins_encode %{
7330 __ movq($dst$$Register, $mem$$Address);
7331 %}
7332 ins_pipe(ialu_reg_mem); // XXX
7333 %}
7334
7335 // Load narrow Klass Pointer
7336 instruct loadNKlass(rRegN dst, memory mem)
7337 %{
7338 predicate(!UseCompactObjectHeaders);
7339 match(Set dst (LoadNKlass mem));
7340
7341 ins_cost(125); // XXX
7342 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7343 ins_encode %{
7344 __ movl($dst$$Register, $mem$$Address);
7345 %}
7346 ins_pipe(ialu_reg_mem); // XXX
7347 %}
7348
7349 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7350 %{
7351 predicate(UseCompactObjectHeaders);
7352 match(Set dst (LoadNKlass mem));
7353 effect(KILL cr);
7354 ins_cost(125);
7355 format %{
7356 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7357 "shrl $dst, markWord::klass_shift_at_offset"
7358 %}
7359 ins_encode %{
7360 if (UseAPX) {
7361 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7362 }
7363 else {
7364 __ movl($dst$$Register, $mem$$Address);
7365 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7366 }
7367 %}
7368 ins_pipe(ialu_reg_mem);
7369 %}
7370
7371 // Load Float
7372 instruct loadF(regF dst, memory mem)
7373 %{
7374 match(Set dst (LoadF mem));
7375
7376 ins_cost(145); // XXX
7377 format %{ "movss $dst, $mem\t# float" %}
7378 ins_encode %{
7379 __ movflt($dst$$XMMRegister, $mem$$Address);
7380 %}
7381 ins_pipe(pipe_slow); // XXX
7382 %}
7383
7384 // Load Double
7385 instruct loadD_partial(regD dst, memory mem)
7386 %{
7387 predicate(!UseXmmLoadAndClearUpper);
7388 match(Set dst (LoadD mem));
7389
7390 ins_cost(145); // XXX
7391 format %{ "movlpd $dst, $mem\t# double" %}
7392 ins_encode %{
7393 __ movdbl($dst$$XMMRegister, $mem$$Address);
7394 %}
7395 ins_pipe(pipe_slow); // XXX
7396 %}
7397
7398 instruct loadD(regD dst, memory mem)
7399 %{
7400 predicate(UseXmmLoadAndClearUpper);
7401 match(Set dst (LoadD mem));
7402
7403 ins_cost(145); // XXX
7404 format %{ "movsd $dst, $mem\t# double" %}
7405 ins_encode %{
7406 __ movdbl($dst$$XMMRegister, $mem$$Address);
7407 %}
7408 ins_pipe(pipe_slow); // XXX
7409 %}
7410
7411 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7412 %{
7413 match(Set dst con);
7414
7415 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7416
7417 ins_encode %{
7418 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7419 %}
7420
7421 ins_pipe(ialu_reg_fat);
7422 %}
7423
7424 // min = java.lang.Math.min(float a, float b)
7425 // max = java.lang.Math.max(float a, float b)
7426 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7427 %{
7428 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7429 match(Set dst (MaxF a b));
7430 match(Set dst (MinF a b));
7431
7432 format %{ "minmaxF $dst, $a, $b" %}
7433 ins_encode %{
7434 int opcode = this->ideal_Opcode();
7435 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7436 %}
7437 ins_pipe( pipe_slow );
7438 %}
7439
7440 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
7441 %{
7442 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7443 match(Set dst (MaxF a b));
7444 match(Set dst (MinF a b));
7445 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7446
7447 format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7448 ins_encode %{
7449 int opcode = this->ideal_Opcode();
7450 bool min = (opcode == Op_MinF) ? true : false;
7451 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7452 min, fp_prec_flt /*pt*/);
7453 %}
7454 ins_pipe( pipe_slow );
7455 %}
7456
7457 // min = java.lang.Math.min(float a, float b)
7458 // max = java.lang.Math.max(float a, float b)
7459 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7460 %{
7461 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7462 match(Set dst (MaxF a b));
7463 match(Set dst (MinF a b));
7464 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7465
7466 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7467 ins_encode %{
7468 int opcode = this->ideal_Opcode();
7469 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7470 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7471 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7472 %}
7473 ins_pipe( pipe_slow );
7474 %}
7475
7476 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
7477 %{
7478 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7479 match(Set dst (MaxF a b));
7480 match(Set dst (MinF a b));
7481 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7482
7483 format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7484 ins_encode %{
7485 int opcode = this->ideal_Opcode();
7486 bool min = (opcode == Op_MinF) ? true : false;
7487 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7488 min, fp_prec_flt /*pt*/);
7489 %}
7490 ins_pipe( pipe_slow );
7491 %}
7492
7493 // min = java.lang.Math.min(double a, double b)
7494 // max = java.lang.Math.max(double a, double b)
7495 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7496 %{
7497 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7498 match(Set dst (MaxD a b));
7499 match(Set dst (MinD a b));
7500
7501 format %{ "minmaxD $dst, $a, $b" %}
7502 ins_encode %{
7503 int opcode = this->ideal_Opcode();
7504 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7505 %}
7506 ins_pipe( pipe_slow );
7507 %}
7508
7509 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
7510 %{
7511 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7512 match(Set dst (MaxD a b));
7513 match(Set dst (MinD a b));
7514 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7515
7516 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7517 ins_encode %{
7518 int opcode = this->ideal_Opcode();
7519 bool min = (opcode == Op_MinD) ? true : false;
7520 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7521 min, fp_prec_dbl /*pt*/);
7522 %}
7523 ins_pipe( pipe_slow );
7524 %}
7525
7526 // min = java.lang.Math.min(double a, double b)
7527 // max = java.lang.Math.max(double a, double b)
7528 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7529 %{
7530 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7531 match(Set dst (MaxD a b));
7532 match(Set dst (MinD a b));
7533 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7534
7535 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7536 ins_encode %{
7537 int opcode = this->ideal_Opcode();
7538 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7539 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7540 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7541 %}
7542 ins_pipe( pipe_slow );
7543 %}
7544
7545 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
7546 %{
7547 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7548 match(Set dst (MaxD a b));
7549 match(Set dst (MinD a b));
7550 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7551
7552 format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7553 ins_encode %{
7554 int opcode = this->ideal_Opcode();
7555 bool min = (opcode == Op_MinD) ? true : false;
7556 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7557 min, fp_prec_dbl /*pt*/);
7558 %}
7559 ins_pipe( pipe_slow );
7560 %}
7561
7562 // Load Effective Address
7563 instruct leaP8(rRegP dst, indOffset8 mem)
7564 %{
7565 match(Set dst mem);
7566
7567 ins_cost(110); // XXX
7568 format %{ "leaq $dst, $mem\t# ptr 8" %}
7569 ins_encode %{
7570 __ leaq($dst$$Register, $mem$$Address);
7571 %}
7572 ins_pipe(ialu_reg_reg_fat);
7573 %}
7574
7575 instruct leaP32(rRegP dst, indOffset32 mem)
7576 %{
7577 match(Set dst mem);
7578
7579 ins_cost(110);
7580 format %{ "leaq $dst, $mem\t# ptr 32" %}
7581 ins_encode %{
7582 __ leaq($dst$$Register, $mem$$Address);
7583 %}
7584 ins_pipe(ialu_reg_reg_fat);
7585 %}
7586
7587 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7588 %{
7589 match(Set dst mem);
7590
7591 ins_cost(110);
7592 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7593 ins_encode %{
7594 __ leaq($dst$$Register, $mem$$Address);
7595 %}
7596 ins_pipe(ialu_reg_reg_fat);
7597 %}
7598
7599 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7600 %{
7601 match(Set dst mem);
7602
7603 ins_cost(110);
7604 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7605 ins_encode %{
7606 __ leaq($dst$$Register, $mem$$Address);
7607 %}
7608 ins_pipe(ialu_reg_reg_fat);
7609 %}
7610
7611 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7612 %{
7613 match(Set dst mem);
7614
7615 ins_cost(110);
7616 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7617 ins_encode %{
7618 __ leaq($dst$$Register, $mem$$Address);
7619 %}
7620 ins_pipe(ialu_reg_reg_fat);
7621 %}
7622
7623 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7624 %{
7625 match(Set dst mem);
7626
7627 ins_cost(110);
7628 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7629 ins_encode %{
7630 __ leaq($dst$$Register, $mem$$Address);
7631 %}
7632 ins_pipe(ialu_reg_reg_fat);
7633 %}
7634
7635 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7636 %{
7637 match(Set dst mem);
7638
7639 ins_cost(110);
7640 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7641 ins_encode %{
7642 __ leaq($dst$$Register, $mem$$Address);
7643 %}
7644 ins_pipe(ialu_reg_reg_fat);
7645 %}
7646
7647 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7648 %{
7649 match(Set dst mem);
7650
7651 ins_cost(110);
7652 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7653 ins_encode %{
7654 __ leaq($dst$$Register, $mem$$Address);
7655 %}
7656 ins_pipe(ialu_reg_reg_fat);
7657 %}
7658
7659 // Load Effective Address which uses Narrow (32-bits) oop
7660 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7661 %{
7662 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7663 match(Set dst mem);
7664
7665 ins_cost(110);
7666 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7667 ins_encode %{
7668 __ leaq($dst$$Register, $mem$$Address);
7669 %}
7670 ins_pipe(ialu_reg_reg_fat);
7671 %}
7672
7673 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7674 %{
7675 predicate(CompressedOops::shift() == 0);
7676 match(Set dst mem);
7677
7678 ins_cost(110); // XXX
7679 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7680 ins_encode %{
7681 __ leaq($dst$$Register, $mem$$Address);
7682 %}
7683 ins_pipe(ialu_reg_reg_fat);
7684 %}
7685
7686 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7687 %{
7688 predicate(CompressedOops::shift() == 0);
7689 match(Set dst mem);
7690
7691 ins_cost(110);
7692 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7693 ins_encode %{
7694 __ leaq($dst$$Register, $mem$$Address);
7695 %}
7696 ins_pipe(ialu_reg_reg_fat);
7697 %}
7698
7699 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7700 %{
7701 predicate(CompressedOops::shift() == 0);
7702 match(Set dst mem);
7703
7704 ins_cost(110);
7705 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7706 ins_encode %{
7707 __ leaq($dst$$Register, $mem$$Address);
7708 %}
7709 ins_pipe(ialu_reg_reg_fat);
7710 %}
7711
7712 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7713 %{
7714 predicate(CompressedOops::shift() == 0);
7715 match(Set dst mem);
7716
7717 ins_cost(110);
7718 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7719 ins_encode %{
7720 __ leaq($dst$$Register, $mem$$Address);
7721 %}
7722 ins_pipe(ialu_reg_reg_fat);
7723 %}
7724
7725 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7726 %{
7727 predicate(CompressedOops::shift() == 0);
7728 match(Set dst mem);
7729
7730 ins_cost(110);
7731 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7732 ins_encode %{
7733 __ leaq($dst$$Register, $mem$$Address);
7734 %}
7735 ins_pipe(ialu_reg_reg_fat);
7736 %}
7737
7738 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7739 %{
7740 predicate(CompressedOops::shift() == 0);
7741 match(Set dst mem);
7742
7743 ins_cost(110);
7744 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7745 ins_encode %{
7746 __ leaq($dst$$Register, $mem$$Address);
7747 %}
7748 ins_pipe(ialu_reg_reg_fat);
7749 %}
7750
7751 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7752 %{
7753 predicate(CompressedOops::shift() == 0);
7754 match(Set dst mem);
7755
7756 ins_cost(110);
7757 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7758 ins_encode %{
7759 __ leaq($dst$$Register, $mem$$Address);
7760 %}
7761 ins_pipe(ialu_reg_reg_fat);
7762 %}
7763
7764 instruct loadConI(rRegI dst, immI src)
7765 %{
7766 match(Set dst src);
7767
7768 format %{ "movl $dst, $src\t# int" %}
7769 ins_encode %{
7770 __ movl($dst$$Register, $src$$constant);
7771 %}
7772 ins_pipe(ialu_reg_fat); // XXX
7773 %}
7774
7775 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7776 %{
7777 match(Set dst src);
7778 effect(KILL cr);
7779
7780 ins_cost(50);
7781 format %{ "xorl $dst, $dst\t# int" %}
7782 ins_encode %{
7783 __ xorl($dst$$Register, $dst$$Register);
7784 %}
7785 ins_pipe(ialu_reg);
7786 %}
7787
7788 instruct loadConL(rRegL dst, immL src)
7789 %{
7790 match(Set dst src);
7791
7792 ins_cost(150);
7793 format %{ "movq $dst, $src\t# long" %}
7794 ins_encode %{
7795 __ mov64($dst$$Register, $src$$constant);
7796 %}
7797 ins_pipe(ialu_reg);
7798 %}
7799
7800 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7801 %{
7802 match(Set dst src);
7803 effect(KILL cr);
7804
7805 ins_cost(50);
7806 format %{ "xorl $dst, $dst\t# long" %}
7807 ins_encode %{
7808 __ xorl($dst$$Register, $dst$$Register);
7809 %}
7810 ins_pipe(ialu_reg); // XXX
7811 %}
7812
7813 instruct loadConUL32(rRegL dst, immUL32 src)
7814 %{
7815 match(Set dst src);
7816
7817 ins_cost(60);
7818 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7819 ins_encode %{
7820 __ movl($dst$$Register, $src$$constant);
7821 %}
7822 ins_pipe(ialu_reg);
7823 %}
7824
7825 instruct loadConL32(rRegL dst, immL32 src)
7826 %{
7827 match(Set dst src);
7828
7829 ins_cost(70);
7830 format %{ "movq $dst, $src\t# long (32-bit)" %}
7831 ins_encode %{
7832 __ movq($dst$$Register, $src$$constant);
7833 %}
7834 ins_pipe(ialu_reg);
7835 %}
7836
7837 instruct loadConP(rRegP dst, immP con) %{
7838 match(Set dst con);
7839
7840 format %{ "movq $dst, $con\t# ptr" %}
7841 ins_encode %{
7842 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7843 %}
7844 ins_pipe(ialu_reg_fat); // XXX
7845 %}
7846
7847 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7848 %{
7849 match(Set dst src);
7850 effect(KILL cr);
7851
7852 ins_cost(50);
7853 format %{ "xorl $dst, $dst\t# ptr" %}
7854 ins_encode %{
7855 __ xorl($dst$$Register, $dst$$Register);
7856 %}
7857 ins_pipe(ialu_reg);
7858 %}
7859
7860 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7861 %{
7862 match(Set dst src);
7863 effect(KILL cr);
7864
7865 ins_cost(60);
7866 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7867 ins_encode %{
7868 __ movl($dst$$Register, $src$$constant);
7869 %}
7870 ins_pipe(ialu_reg);
7871 %}
7872
7873 instruct loadConF(regF dst, immF con) %{
7874 match(Set dst con);
7875 ins_cost(125);
7876 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7877 ins_encode %{
7878 __ movflt($dst$$XMMRegister, $constantaddress($con));
7879 %}
7880 ins_pipe(pipe_slow);
7881 %}
7882
7883 instruct loadConH(regF dst, immH con) %{
7884 match(Set dst con);
7885 ins_cost(125);
7886 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7887 ins_encode %{
7888 __ movflt($dst$$XMMRegister, $constantaddress($con));
7889 %}
7890 ins_pipe(pipe_slow);
7891 %}
7892
7893 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7894 match(Set dst src);
7895 effect(KILL cr);
7896 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7897 ins_encode %{
7898 __ xorq($dst$$Register, $dst$$Register);
7899 %}
7900 ins_pipe(ialu_reg);
7901 %}
7902
7903 instruct loadConN(rRegN dst, immN src) %{
7904 match(Set dst src);
7905
7906 ins_cost(125);
7907 format %{ "movl $dst, $src\t# compressed ptr" %}
7908 ins_encode %{
7909 address con = (address)$src$$constant;
7910 if (con == nullptr) {
7911 ShouldNotReachHere();
7912 } else {
7913 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7914 }
7915 %}
7916 ins_pipe(ialu_reg_fat); // XXX
7917 %}
7918
7919 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7920 match(Set dst src);
7921
7922 ins_cost(125);
7923 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7924 ins_encode %{
7925 address con = (address)$src$$constant;
7926 if (con == nullptr) {
7927 ShouldNotReachHere();
7928 } else {
7929 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7930 }
7931 %}
7932 ins_pipe(ialu_reg_fat); // XXX
7933 %}
7934
7935 instruct loadConF0(regF dst, immF0 src)
7936 %{
7937 match(Set dst src);
7938 ins_cost(100);
7939
7940 format %{ "xorps $dst, $dst\t# float 0.0" %}
7941 ins_encode %{
7942 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7943 %}
7944 ins_pipe(pipe_slow);
7945 %}
7946
7947 // Use the same format since predicate() can not be used here.
7948 instruct loadConD(regD dst, immD con) %{
7949 match(Set dst con);
7950 ins_cost(125);
7951 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7952 ins_encode %{
7953 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7954 %}
7955 ins_pipe(pipe_slow);
7956 %}
7957
7958 instruct loadConD0(regD dst, immD0 src)
7959 %{
7960 match(Set dst src);
7961 ins_cost(100);
7962
7963 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7964 ins_encode %{
7965 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7966 %}
7967 ins_pipe(pipe_slow);
7968 %}
7969
7970 instruct loadSSI(rRegI dst, stackSlotI src)
7971 %{
7972 match(Set dst src);
7973
7974 ins_cost(125);
7975 format %{ "movl $dst, $src\t# int stk" %}
7976 ins_encode %{
7977 __ movl($dst$$Register, $src$$Address);
7978 %}
7979 ins_pipe(ialu_reg_mem);
7980 %}
7981
7982 instruct loadSSL(rRegL dst, stackSlotL src)
7983 %{
7984 match(Set dst src);
7985
7986 ins_cost(125);
7987 format %{ "movq $dst, $src\t# long stk" %}
7988 ins_encode %{
7989 __ movq($dst$$Register, $src$$Address);
7990 %}
7991 ins_pipe(ialu_reg_mem);
7992 %}
7993
7994 instruct loadSSP(rRegP dst, stackSlotP src)
7995 %{
7996 match(Set dst src);
7997
7998 ins_cost(125);
7999 format %{ "movq $dst, $src\t# ptr stk" %}
8000 ins_encode %{
8001 __ movq($dst$$Register, $src$$Address);
8002 %}
8003 ins_pipe(ialu_reg_mem);
8004 %}
8005
8006 instruct loadSSF(regF dst, stackSlotF src)
8007 %{
8008 match(Set dst src);
8009
8010 ins_cost(125);
8011 format %{ "movss $dst, $src\t# float stk" %}
8012 ins_encode %{
8013 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
8014 %}
8015 ins_pipe(pipe_slow); // XXX
8016 %}
8017
8018 // Use the same format since predicate() can not be used here.
8019 instruct loadSSD(regD dst, stackSlotD src)
8020 %{
8021 match(Set dst src);
8022
8023 ins_cost(125);
8024 format %{ "movsd $dst, $src\t# double stk" %}
8025 ins_encode %{
8026 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
8027 %}
8028 ins_pipe(pipe_slow); // XXX
8029 %}
8030
8031 // Prefetch instructions for allocation.
8032 // Must be safe to execute with invalid address (cannot fault).
8033
8034 instruct prefetchAlloc( memory mem ) %{
8035 predicate(AllocatePrefetchInstr==3);
8036 match(PrefetchAllocation mem);
8037 ins_cost(125);
8038
8039 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
8040 ins_encode %{
8041 __ prefetchw($mem$$Address);
8042 %}
8043 ins_pipe(ialu_mem);
8044 %}
8045
8046 instruct prefetchAllocNTA( memory mem ) %{
8047 predicate(AllocatePrefetchInstr==0);
8048 match(PrefetchAllocation mem);
8049 ins_cost(125);
8050
8051 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
8052 ins_encode %{
8053 __ prefetchnta($mem$$Address);
8054 %}
8055 ins_pipe(ialu_mem);
8056 %}
8057
8058 instruct prefetchAllocT0( memory mem ) %{
8059 predicate(AllocatePrefetchInstr==1);
8060 match(PrefetchAllocation mem);
8061 ins_cost(125);
8062
8063 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8064 ins_encode %{
8065 __ prefetcht0($mem$$Address);
8066 %}
8067 ins_pipe(ialu_mem);
8068 %}
8069
8070 instruct prefetchAllocT2( memory mem ) %{
8071 predicate(AllocatePrefetchInstr==2);
8072 match(PrefetchAllocation mem);
8073 ins_cost(125);
8074
8075 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8076 ins_encode %{
8077 __ prefetcht2($mem$$Address);
8078 %}
8079 ins_pipe(ialu_mem);
8080 %}
8081
8082 //----------Store Instructions-------------------------------------------------
8083
8084 // Store Byte
8085 instruct storeB(memory mem, rRegI src)
8086 %{
8087 match(Set mem (StoreB mem src));
8088
8089 ins_cost(125); // XXX
8090 format %{ "movb $mem, $src\t# byte" %}
8091 ins_encode %{
8092 __ movb($mem$$Address, $src$$Register);
8093 %}
8094 ins_pipe(ialu_mem_reg);
8095 %}
8096
8097 // Store Char/Short
8098 instruct storeC(memory mem, rRegI src)
8099 %{
8100 match(Set mem (StoreC mem src));
8101
8102 ins_cost(125); // XXX
8103 format %{ "movw $mem, $src\t# char/short" %}
8104 ins_encode %{
8105 __ movw($mem$$Address, $src$$Register);
8106 %}
8107 ins_pipe(ialu_mem_reg);
8108 %}
8109
8110 // Store Integer
8111 instruct storeI(memory mem, rRegI src)
8112 %{
8113 match(Set mem (StoreI mem src));
8114
8115 ins_cost(125); // XXX
8116 format %{ "movl $mem, $src\t# int" %}
8117 ins_encode %{
8118 __ movl($mem$$Address, $src$$Register);
8119 %}
8120 ins_pipe(ialu_mem_reg);
8121 %}
8122
8123 // Store Long
8124 instruct storeL(memory mem, rRegL src)
8125 %{
8126 match(Set mem (StoreL mem src));
8127
8128 ins_cost(125); // XXX
8129 format %{ "movq $mem, $src\t# long" %}
8130 ins_encode %{
8131 __ movq($mem$$Address, $src$$Register);
8132 %}
8133 ins_pipe(ialu_mem_reg); // XXX
8134 %}
8135
8136 // Store Pointer
8137 instruct storeP(memory mem, any_RegP src)
8138 %{
8139 predicate(n->as_Store()->barrier_data() == 0);
8140 match(Set mem (StoreP mem src));
8141
8142 ins_cost(125); // XXX
8143 format %{ "movq $mem, $src\t# ptr" %}
8144 ins_encode %{
8145 __ movq($mem$$Address, $src$$Register);
8146 %}
8147 ins_pipe(ialu_mem_reg);
8148 %}
8149
8150 instruct storeImmP0(memory mem, immP0 zero)
8151 %{
8152 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8153 match(Set mem (StoreP mem zero));
8154
8155 ins_cost(125); // XXX
8156 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8157 ins_encode %{
8158 __ movq($mem$$Address, r12);
8159 %}
8160 ins_pipe(ialu_mem_reg);
8161 %}
8162
8163 // Store Null Pointer, mark word, or other simple pointer constant.
8164 instruct storeImmP(memory mem, immP31 src)
8165 %{
8166 predicate(n->as_Store()->barrier_data() == 0);
8167 match(Set mem (StoreP mem src));
8168
8169 ins_cost(150); // XXX
8170 format %{ "movq $mem, $src\t# ptr" %}
8171 ins_encode %{
8172 __ movq($mem$$Address, $src$$constant);
8173 %}
8174 ins_pipe(ialu_mem_imm);
8175 %}
8176
8177 // Store Compressed Pointer
8178 instruct storeN(memory mem, rRegN src)
8179 %{
8180 predicate(n->as_Store()->barrier_data() == 0);
8181 match(Set mem (StoreN mem src));
8182
8183 ins_cost(125); // XXX
8184 format %{ "movl $mem, $src\t# compressed ptr" %}
8185 ins_encode %{
8186 __ movl($mem$$Address, $src$$Register);
8187 %}
8188 ins_pipe(ialu_mem_reg);
8189 %}
8190
8191 instruct storeNKlass(memory mem, rRegN src)
8192 %{
8193 match(Set mem (StoreNKlass mem src));
8194
8195 ins_cost(125); // XXX
8196 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8197 ins_encode %{
8198 __ movl($mem$$Address, $src$$Register);
8199 %}
8200 ins_pipe(ialu_mem_reg);
8201 %}
8202
8203 instruct storeImmN0(memory mem, immN0 zero)
8204 %{
8205 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8206 match(Set mem (StoreN mem zero));
8207
8208 ins_cost(125); // XXX
8209 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8210 ins_encode %{
8211 __ movl($mem$$Address, r12);
8212 %}
8213 ins_pipe(ialu_mem_reg);
8214 %}
8215
8216 instruct storeImmN(memory mem, immN src)
8217 %{
8218 predicate(n->as_Store()->barrier_data() == 0);
8219 match(Set mem (StoreN mem src));
8220
8221 ins_cost(150); // XXX
8222 format %{ "movl $mem, $src\t# compressed ptr" %}
8223 ins_encode %{
8224 address con = (address)$src$$constant;
8225 if (con == nullptr) {
8226 __ movl($mem$$Address, 0);
8227 } else {
8228 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8229 }
8230 %}
8231 ins_pipe(ialu_mem_imm);
8232 %}
8233
8234 instruct storeImmNKlass(memory mem, immNKlass src)
8235 %{
8236 match(Set mem (StoreNKlass mem src));
8237
8238 ins_cost(150); // XXX
8239 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8240 ins_encode %{
8241 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8242 %}
8243 ins_pipe(ialu_mem_imm);
8244 %}
8245
8246 // Store Integer Immediate
8247 instruct storeImmI0(memory mem, immI_0 zero)
8248 %{
8249 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8250 match(Set mem (StoreI mem zero));
8251
8252 ins_cost(125); // XXX
8253 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8254 ins_encode %{
8255 __ movl($mem$$Address, r12);
8256 %}
8257 ins_pipe(ialu_mem_reg);
8258 %}
8259
8260 instruct storeImmI(memory mem, immI src)
8261 %{
8262 match(Set mem (StoreI mem src));
8263
8264 ins_cost(150);
8265 format %{ "movl $mem, $src\t# int" %}
8266 ins_encode %{
8267 __ movl($mem$$Address, $src$$constant);
8268 %}
8269 ins_pipe(ialu_mem_imm);
8270 %}
8271
8272 // Store Long Immediate
8273 instruct storeImmL0(memory mem, immL0 zero)
8274 %{
8275 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8276 match(Set mem (StoreL mem zero));
8277
8278 ins_cost(125); // XXX
8279 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8280 ins_encode %{
8281 __ movq($mem$$Address, r12);
8282 %}
8283 ins_pipe(ialu_mem_reg);
8284 %}
8285
8286 instruct storeImmL(memory mem, immL32 src)
8287 %{
8288 match(Set mem (StoreL mem src));
8289
8290 ins_cost(150);
8291 format %{ "movq $mem, $src\t# long" %}
8292 ins_encode %{
8293 __ movq($mem$$Address, $src$$constant);
8294 %}
8295 ins_pipe(ialu_mem_imm);
8296 %}
8297
8298 // Store Short/Char Immediate
8299 instruct storeImmC0(memory mem, immI_0 zero)
8300 %{
8301 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8302 match(Set mem (StoreC mem zero));
8303
8304 ins_cost(125); // XXX
8305 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8306 ins_encode %{
8307 __ movw($mem$$Address, r12);
8308 %}
8309 ins_pipe(ialu_mem_reg);
8310 %}
8311
8312 instruct storeImmI16(memory mem, immI16 src)
8313 %{
8314 predicate(UseStoreImmI16);
8315 match(Set mem (StoreC mem src));
8316
8317 ins_cost(150);
8318 format %{ "movw $mem, $src\t# short/char" %}
8319 ins_encode %{
8320 __ movw($mem$$Address, $src$$constant);
8321 %}
8322 ins_pipe(ialu_mem_imm);
8323 %}
8324
8325 // Store Byte Immediate
8326 instruct storeImmB0(memory mem, immI_0 zero)
8327 %{
8328 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8329 match(Set mem (StoreB mem zero));
8330
8331 ins_cost(125); // XXX
8332 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8333 ins_encode %{
8334 __ movb($mem$$Address, r12);
8335 %}
8336 ins_pipe(ialu_mem_reg);
8337 %}
8338
8339 instruct storeImmB(memory mem, immI8 src)
8340 %{
8341 match(Set mem (StoreB mem src));
8342
8343 ins_cost(150); // XXX
8344 format %{ "movb $mem, $src\t# byte" %}
8345 ins_encode %{
8346 __ movb($mem$$Address, $src$$constant);
8347 %}
8348 ins_pipe(ialu_mem_imm);
8349 %}
8350
8351 // Store Float
8352 instruct storeF(memory mem, regF src)
8353 %{
8354 match(Set mem (StoreF mem src));
8355
8356 ins_cost(95); // XXX
8357 format %{ "movss $mem, $src\t# float" %}
8358 ins_encode %{
8359 __ movflt($mem$$Address, $src$$XMMRegister);
8360 %}
8361 ins_pipe(pipe_slow); // XXX
8362 %}
8363
8364 // Store immediate Float value (it is faster than store from XMM register)
8365 instruct storeF0(memory mem, immF0 zero)
8366 %{
8367 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8368 match(Set mem (StoreF mem zero));
8369
8370 ins_cost(25); // XXX
8371 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8372 ins_encode %{
8373 __ movl($mem$$Address, r12);
8374 %}
8375 ins_pipe(ialu_mem_reg);
8376 %}
8377
8378 instruct storeF_imm(memory mem, immF src)
8379 %{
8380 match(Set mem (StoreF mem src));
8381
8382 ins_cost(50);
8383 format %{ "movl $mem, $src\t# float" %}
8384 ins_encode %{
8385 __ movl($mem$$Address, jint_cast($src$$constant));
8386 %}
8387 ins_pipe(ialu_mem_imm);
8388 %}
8389
8390 // Store Double
8391 instruct storeD(memory mem, regD src)
8392 %{
8393 match(Set mem (StoreD mem src));
8394
8395 ins_cost(95); // XXX
8396 format %{ "movsd $mem, $src\t# double" %}
8397 ins_encode %{
8398 __ movdbl($mem$$Address, $src$$XMMRegister);
8399 %}
8400 ins_pipe(pipe_slow); // XXX
8401 %}
8402
8403 // Store immediate double 0.0 (it is faster than store from XMM register)
8404 instruct storeD0_imm(memory mem, immD0 src)
8405 %{
8406 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8407 match(Set mem (StoreD mem src));
8408
8409 ins_cost(50);
8410 format %{ "movq $mem, $src\t# double 0." %}
8411 ins_encode %{
8412 __ movq($mem$$Address, $src$$constant);
8413 %}
8414 ins_pipe(ialu_mem_imm);
8415 %}
8416
8417 instruct storeD0(memory mem, immD0 zero)
8418 %{
8419 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8420 match(Set mem (StoreD mem zero));
8421
8422 ins_cost(25); // XXX
8423 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8424 ins_encode %{
8425 __ movq($mem$$Address, r12);
8426 %}
8427 ins_pipe(ialu_mem_reg);
8428 %}
8429
8430 instruct storeSSI(stackSlotI dst, rRegI src)
8431 %{
8432 match(Set dst src);
8433
8434 ins_cost(100);
8435 format %{ "movl $dst, $src\t# int stk" %}
8436 ins_encode %{
8437 __ movl($dst$$Address, $src$$Register);
8438 %}
8439 ins_pipe( ialu_mem_reg );
8440 %}
8441
8442 instruct storeSSL(stackSlotL dst, rRegL src)
8443 %{
8444 match(Set dst src);
8445
8446 ins_cost(100);
8447 format %{ "movq $dst, $src\t# long stk" %}
8448 ins_encode %{
8449 __ movq($dst$$Address, $src$$Register);
8450 %}
8451 ins_pipe(ialu_mem_reg);
8452 %}
8453
8454 instruct storeSSP(stackSlotP dst, rRegP src)
8455 %{
8456 match(Set dst src);
8457
8458 ins_cost(100);
8459 format %{ "movq $dst, $src\t# ptr stk" %}
8460 ins_encode %{
8461 __ movq($dst$$Address, $src$$Register);
8462 %}
8463 ins_pipe(ialu_mem_reg);
8464 %}
8465
8466 instruct storeSSF(stackSlotF dst, regF src)
8467 %{
8468 match(Set dst src);
8469
8470 ins_cost(95); // XXX
8471 format %{ "movss $dst, $src\t# float stk" %}
8472 ins_encode %{
8473 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8474 %}
8475 ins_pipe(pipe_slow); // XXX
8476 %}
8477
8478 instruct storeSSD(stackSlotD dst, regD src)
8479 %{
8480 match(Set dst src);
8481
8482 ins_cost(95); // XXX
8483 format %{ "movsd $dst, $src\t# double stk" %}
8484 ins_encode %{
8485 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8486 %}
8487 ins_pipe(pipe_slow); // XXX
8488 %}
8489
8490 instruct cacheWB(indirect addr)
8491 %{
8492 predicate(VM_Version::supports_data_cache_line_flush());
8493 match(CacheWB addr);
8494
8495 ins_cost(100);
8496 format %{"cache wb $addr" %}
8497 ins_encode %{
8498 assert($addr->index_position() < 0, "should be");
8499 assert($addr$$disp == 0, "should be");
8500 __ cache_wb(Address($addr$$base$$Register, 0));
8501 %}
8502 ins_pipe(pipe_slow); // XXX
8503 %}
8504
8505 instruct cacheWBPreSync()
8506 %{
8507 predicate(VM_Version::supports_data_cache_line_flush());
8508 match(CacheWBPreSync);
8509
8510 ins_cost(100);
8511 format %{"cache wb presync" %}
8512 ins_encode %{
8513 __ cache_wbsync(true);
8514 %}
8515 ins_pipe(pipe_slow); // XXX
8516 %}
8517
8518 instruct cacheWBPostSync()
8519 %{
8520 predicate(VM_Version::supports_data_cache_line_flush());
8521 match(CacheWBPostSync);
8522
8523 ins_cost(100);
8524 format %{"cache wb postsync" %}
8525 ins_encode %{
8526 __ cache_wbsync(false);
8527 %}
8528 ins_pipe(pipe_slow); // XXX
8529 %}
8530
8531 //----------BSWAP Instructions-------------------------------------------------
8532 instruct bytes_reverse_int(rRegI dst) %{
8533 match(Set dst (ReverseBytesI dst));
8534
8535 format %{ "bswapl $dst" %}
8536 ins_encode %{
8537 __ bswapl($dst$$Register);
8538 %}
8539 ins_pipe( ialu_reg );
8540 %}
8541
8542 instruct bytes_reverse_long(rRegL dst) %{
8543 match(Set dst (ReverseBytesL dst));
8544
8545 format %{ "bswapq $dst" %}
8546 ins_encode %{
8547 __ bswapq($dst$$Register);
8548 %}
8549 ins_pipe( ialu_reg);
8550 %}
8551
8552 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8553 match(Set dst (ReverseBytesUS dst));
8554 effect(KILL cr);
8555
8556 format %{ "bswapl $dst\n\t"
8557 "shrl $dst,16\n\t" %}
8558 ins_encode %{
8559 __ bswapl($dst$$Register);
8560 __ shrl($dst$$Register, 16);
8561 %}
8562 ins_pipe( ialu_reg );
8563 %}
8564
8565 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8566 match(Set dst (ReverseBytesS dst));
8567 effect(KILL cr);
8568
8569 format %{ "bswapl $dst\n\t"
8570 "sar $dst,16\n\t" %}
8571 ins_encode %{
8572 __ bswapl($dst$$Register);
8573 __ sarl($dst$$Register, 16);
8574 %}
8575 ins_pipe( ialu_reg );
8576 %}
8577
8578 //---------- Zeros Count Instructions ------------------------------------------
8579
8580 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8581 predicate(UseCountLeadingZerosInstruction);
8582 match(Set dst (CountLeadingZerosI src));
8583 effect(KILL cr);
8584
8585 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8586 ins_encode %{
8587 __ lzcntl($dst$$Register, $src$$Register);
8588 %}
8589 ins_pipe(ialu_reg);
8590 %}
8591
8592 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8593 predicate(UseCountLeadingZerosInstruction);
8594 match(Set dst (CountLeadingZerosI (LoadI src)));
8595 effect(KILL cr);
8596 ins_cost(175);
8597 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8598 ins_encode %{
8599 __ lzcntl($dst$$Register, $src$$Address);
8600 %}
8601 ins_pipe(ialu_reg_mem);
8602 %}
8603
8604 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8605 predicate(!UseCountLeadingZerosInstruction);
8606 match(Set dst (CountLeadingZerosI src));
8607 effect(KILL cr);
8608
8609 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8610 "jnz skip\n\t"
8611 "movl $dst, -1\n"
8612 "skip:\n\t"
8613 "negl $dst\n\t"
8614 "addl $dst, 31" %}
8615 ins_encode %{
8616 Register Rdst = $dst$$Register;
8617 Register Rsrc = $src$$Register;
8618 Label skip;
8619 __ bsrl(Rdst, Rsrc);
8620 __ jccb(Assembler::notZero, skip);
8621 __ movl(Rdst, -1);
8622 __ bind(skip);
8623 __ negl(Rdst);
8624 __ addl(Rdst, BitsPerInt - 1);
8625 %}
8626 ins_pipe(ialu_reg);
8627 %}
8628
8629 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8630 predicate(UseCountLeadingZerosInstruction);
8631 match(Set dst (CountLeadingZerosL src));
8632 effect(KILL cr);
8633
8634 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8635 ins_encode %{
8636 __ lzcntq($dst$$Register, $src$$Register);
8637 %}
8638 ins_pipe(ialu_reg);
8639 %}
8640
8641 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8642 predicate(UseCountLeadingZerosInstruction);
8643 match(Set dst (CountLeadingZerosL (LoadL src)));
8644 effect(KILL cr);
8645 ins_cost(175);
8646 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8647 ins_encode %{
8648 __ lzcntq($dst$$Register, $src$$Address);
8649 %}
8650 ins_pipe(ialu_reg_mem);
8651 %}
8652
8653 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8654 predicate(!UseCountLeadingZerosInstruction);
8655 match(Set dst (CountLeadingZerosL src));
8656 effect(KILL cr);
8657
8658 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8659 "jnz skip\n\t"
8660 "movl $dst, -1\n"
8661 "skip:\n\t"
8662 "negl $dst\n\t"
8663 "addl $dst, 63" %}
8664 ins_encode %{
8665 Register Rdst = $dst$$Register;
8666 Register Rsrc = $src$$Register;
8667 Label skip;
8668 __ bsrq(Rdst, Rsrc);
8669 __ jccb(Assembler::notZero, skip);
8670 __ movl(Rdst, -1);
8671 __ bind(skip);
8672 __ negl(Rdst);
8673 __ addl(Rdst, BitsPerLong - 1);
8674 %}
8675 ins_pipe(ialu_reg);
8676 %}
8677
8678 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8679 predicate(UseCountTrailingZerosInstruction);
8680 match(Set dst (CountTrailingZerosI src));
8681 effect(KILL cr);
8682
8683 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8684 ins_encode %{
8685 __ tzcntl($dst$$Register, $src$$Register);
8686 %}
8687 ins_pipe(ialu_reg);
8688 %}
8689
8690 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8691 predicate(UseCountTrailingZerosInstruction);
8692 match(Set dst (CountTrailingZerosI (LoadI src)));
8693 effect(KILL cr);
8694 ins_cost(175);
8695 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8696 ins_encode %{
8697 __ tzcntl($dst$$Register, $src$$Address);
8698 %}
8699 ins_pipe(ialu_reg_mem);
8700 %}
8701
8702 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8703 predicate(!UseCountTrailingZerosInstruction);
8704 match(Set dst (CountTrailingZerosI src));
8705 effect(KILL cr);
8706
8707 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8708 "jnz done\n\t"
8709 "movl $dst, 32\n"
8710 "done:" %}
8711 ins_encode %{
8712 Register Rdst = $dst$$Register;
8713 Label done;
8714 __ bsfl(Rdst, $src$$Register);
8715 __ jccb(Assembler::notZero, done);
8716 __ movl(Rdst, BitsPerInt);
8717 __ bind(done);
8718 %}
8719 ins_pipe(ialu_reg);
8720 %}
8721
8722 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8723 predicate(UseCountTrailingZerosInstruction);
8724 match(Set dst (CountTrailingZerosL src));
8725 effect(KILL cr);
8726
8727 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8728 ins_encode %{
8729 __ tzcntq($dst$$Register, $src$$Register);
8730 %}
8731 ins_pipe(ialu_reg);
8732 %}
8733
8734 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8735 predicate(UseCountTrailingZerosInstruction);
8736 match(Set dst (CountTrailingZerosL (LoadL src)));
8737 effect(KILL cr);
8738 ins_cost(175);
8739 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8740 ins_encode %{
8741 __ tzcntq($dst$$Register, $src$$Address);
8742 %}
8743 ins_pipe(ialu_reg_mem);
8744 %}
8745
8746 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8747 predicate(!UseCountTrailingZerosInstruction);
8748 match(Set dst (CountTrailingZerosL src));
8749 effect(KILL cr);
8750
8751 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8752 "jnz done\n\t"
8753 "movl $dst, 64\n"
8754 "done:" %}
8755 ins_encode %{
8756 Register Rdst = $dst$$Register;
8757 Label done;
8758 __ bsfq(Rdst, $src$$Register);
8759 __ jccb(Assembler::notZero, done);
8760 __ movl(Rdst, BitsPerLong);
8761 __ bind(done);
8762 %}
8763 ins_pipe(ialu_reg);
8764 %}
8765
8766 //--------------- Reverse Operation Instructions ----------------
8767 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8768 predicate(!VM_Version::supports_gfni());
8769 match(Set dst (ReverseI src));
8770 effect(TEMP dst, TEMP rtmp, KILL cr);
8771 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8772 ins_encode %{
8773 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8774 %}
8775 ins_pipe( ialu_reg );
8776 %}
8777
8778 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8779 predicate(VM_Version::supports_gfni());
8780 match(Set dst (ReverseI src));
8781 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8782 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8783 ins_encode %{
8784 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8785 %}
8786 ins_pipe( ialu_reg );
8787 %}
8788
8789 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8790 predicate(!VM_Version::supports_gfni());
8791 match(Set dst (ReverseL src));
8792 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8793 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8794 ins_encode %{
8795 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8796 %}
8797 ins_pipe( ialu_reg );
8798 %}
8799
8800 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8801 predicate(VM_Version::supports_gfni());
8802 match(Set dst (ReverseL src));
8803 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8804 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8805 ins_encode %{
8806 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8807 %}
8808 ins_pipe( ialu_reg );
8809 %}
8810
8811 //---------- Population Count Instructions -------------------------------------
8812
8813 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8814 predicate(UsePopCountInstruction);
8815 match(Set dst (PopCountI src));
8816 effect(KILL cr);
8817
8818 format %{ "popcnt $dst, $src" %}
8819 ins_encode %{
8820 __ popcntl($dst$$Register, $src$$Register);
8821 %}
8822 ins_pipe(ialu_reg);
8823 %}
8824
8825 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8826 predicate(UsePopCountInstruction);
8827 match(Set dst (PopCountI (LoadI mem)));
8828 effect(KILL cr);
8829
8830 format %{ "popcnt $dst, $mem" %}
8831 ins_encode %{
8832 __ popcntl($dst$$Register, $mem$$Address);
8833 %}
8834 ins_pipe(ialu_reg);
8835 %}
8836
8837 // Note: Long.bitCount(long) returns an int.
8838 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8839 predicate(UsePopCountInstruction);
8840 match(Set dst (PopCountL src));
8841 effect(KILL cr);
8842
8843 format %{ "popcnt $dst, $src" %}
8844 ins_encode %{
8845 __ popcntq($dst$$Register, $src$$Register);
8846 %}
8847 ins_pipe(ialu_reg);
8848 %}
8849
8850 // Note: Long.bitCount(long) returns an int.
8851 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8852 predicate(UsePopCountInstruction);
8853 match(Set dst (PopCountL (LoadL mem)));
8854 effect(KILL cr);
8855
8856 format %{ "popcnt $dst, $mem" %}
8857 ins_encode %{
8858 __ popcntq($dst$$Register, $mem$$Address);
8859 %}
8860 ins_pipe(ialu_reg);
8861 %}
8862
8863
8864 //----------MemBar Instructions-----------------------------------------------
8865 // Memory barrier flavors
8866
8867 instruct membar_acquire()
8868 %{
8869 match(MemBarAcquire);
8870 match(LoadFence);
8871 ins_cost(0);
8872
8873 size(0);
8874 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8875 ins_encode();
8876 ins_pipe(empty);
8877 %}
8878
8879 instruct membar_acquire_lock()
8880 %{
8881 match(MemBarAcquireLock);
8882 ins_cost(0);
8883
8884 size(0);
8885 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8886 ins_encode();
8887 ins_pipe(empty);
8888 %}
8889
8890 instruct membar_release()
8891 %{
8892 match(MemBarRelease);
8893 match(StoreFence);
8894 ins_cost(0);
8895
8896 size(0);
8897 format %{ "MEMBAR-release ! (empty encoding)" %}
8898 ins_encode();
8899 ins_pipe(empty);
8900 %}
8901
8902 instruct membar_release_lock()
8903 %{
8904 match(MemBarReleaseLock);
8905 ins_cost(0);
8906
8907 size(0);
8908 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8909 ins_encode();
8910 ins_pipe(empty);
8911 %}
8912
8913 instruct membar_storeload(rFlagsReg cr) %{
8914 match(MemBarStoreLoad);
8915 effect(KILL cr);
8916 ins_cost(400);
8917
8918 format %{
8919 $$template
8920 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8921 %}
8922 ins_encode %{
8923 __ membar(Assembler::StoreLoad);
8924 %}
8925 ins_pipe(pipe_slow);
8926 %}
8927
8928 instruct membar_volatile(rFlagsReg cr) %{
8929 match(MemBarVolatile);
8930 effect(KILL cr);
8931 ins_cost(400);
8932
8933 format %{
8934 $$template
8935 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8936 %}
8937 ins_encode %{
8938 __ membar(Assembler::StoreLoad);
8939 %}
8940 ins_pipe(pipe_slow);
8941 %}
8942
8943 instruct unnecessary_membar_volatile()
8944 %{
8945 match(MemBarVolatile);
8946 predicate(Matcher::post_store_load_barrier(n));
8947 ins_cost(0);
8948
8949 size(0);
8950 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8951 ins_encode();
8952 ins_pipe(empty);
8953 %}
8954
8955 instruct membar_full(rFlagsReg cr) %{
8956 match(MemBarFull);
8957 effect(KILL cr);
8958 ins_cost(400);
8959
8960 format %{
8961 $$template
8962 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8963 %}
8964 ins_encode %{
8965 __ membar(Assembler::StoreLoad);
8966 %}
8967 ins_pipe(pipe_slow);
8968 %}
8969
8970 instruct membar_storestore() %{
8971 match(MemBarStoreStore);
8972 match(StoreStoreFence);
8973 ins_cost(0);
8974
8975 size(0);
8976 format %{ "MEMBAR-storestore (empty encoding)" %}
8977 ins_encode( );
8978 ins_pipe(empty);
8979 %}
8980
8981 //----------Move Instructions--------------------------------------------------
8982
8983 instruct castX2P(rRegP dst, rRegL src)
8984 %{
8985 match(Set dst (CastX2P src));
8986
8987 format %{ "movq $dst, $src\t# long->ptr" %}
8988 ins_encode %{
8989 if ($dst$$reg != $src$$reg) {
8990 __ movptr($dst$$Register, $src$$Register);
8991 }
8992 %}
8993 ins_pipe(ialu_reg_reg); // XXX
8994 %}
8995
8996 instruct castI2N(rRegN dst, rRegI src)
8997 %{
8998 match(Set dst (CastI2N src));
8999
9000 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
9001 ins_encode %{
9002 if ($dst$$reg != $src$$reg) {
9003 __ movl($dst$$Register, $src$$Register);
9004 }
9005 %}
9006 ins_pipe(ialu_reg_reg); // XXX
9007 %}
9008
9009 instruct castN2X(rRegL dst, rRegN src)
9010 %{
9011 match(Set dst (CastP2X src));
9012
9013 format %{ "movq $dst, $src\t# ptr -> long" %}
9014 ins_encode %{
9015 if ($dst$$reg != $src$$reg) {
9016 __ movptr($dst$$Register, $src$$Register);
9017 }
9018 %}
9019 ins_pipe(ialu_reg_reg); // XXX
9020 %}
9021
9022 instruct castP2X(rRegL dst, rRegP src)
9023 %{
9024 match(Set dst (CastP2X src));
9025
9026 format %{ "movq $dst, $src\t# ptr -> long" %}
9027 ins_encode %{
9028 if ($dst$$reg != $src$$reg) {
9029 __ movptr($dst$$Register, $src$$Register);
9030 }
9031 %}
9032 ins_pipe(ialu_reg_reg); // XXX
9033 %}
9034
9035 // Convert oop into int for vectors alignment masking
9036 instruct convP2I(rRegI dst, rRegP src)
9037 %{
9038 match(Set dst (ConvL2I (CastP2X src)));
9039
9040 format %{ "movl $dst, $src\t# ptr -> int" %}
9041 ins_encode %{
9042 __ movl($dst$$Register, $src$$Register);
9043 %}
9044 ins_pipe(ialu_reg_reg); // XXX
9045 %}
9046
9047 // Convert compressed oop into int for vectors alignment masking
9048 // in case of 32bit oops (heap < 4Gb).
9049 instruct convN2I(rRegI dst, rRegN src)
9050 %{
9051 predicate(CompressedOops::shift() == 0);
9052 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9053
9054 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
9055 ins_encode %{
9056 __ movl($dst$$Register, $src$$Register);
9057 %}
9058 ins_pipe(ialu_reg_reg); // XXX
9059 %}
9060
9061 // Convert oop pointer into compressed form
9062 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
9063 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9064 match(Set dst (EncodeP src));
9065 effect(KILL cr);
9066 format %{ "encode_heap_oop $dst,$src" %}
9067 ins_encode %{
9068 Register s = $src$$Register;
9069 Register d = $dst$$Register;
9070 if (s != d) {
9071 __ movq(d, s);
9072 }
9073 __ encode_heap_oop(d);
9074 %}
9075 ins_pipe(ialu_reg_long);
9076 %}
9077
9078 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9079 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9080 match(Set dst (EncodeP src));
9081 effect(KILL cr);
9082 format %{ "encode_heap_oop_not_null $dst,$src" %}
9083 ins_encode %{
9084 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9085 %}
9086 ins_pipe(ialu_reg_long);
9087 %}
9088
9089 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9090 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9091 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9092 match(Set dst (DecodeN src));
9093 effect(KILL cr);
9094 format %{ "decode_heap_oop $dst,$src" %}
9095 ins_encode %{
9096 Register s = $src$$Register;
9097 Register d = $dst$$Register;
9098 if (s != d) {
9099 __ movq(d, s);
9100 }
9101 __ decode_heap_oop(d);
9102 %}
9103 ins_pipe(ialu_reg_long);
9104 %}
9105
9106 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9107 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9108 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9109 match(Set dst (DecodeN src));
9110 effect(KILL cr);
9111 format %{ "decode_heap_oop_not_null $dst,$src" %}
9112 ins_encode %{
9113 Register s = $src$$Register;
9114 Register d = $dst$$Register;
9115 if (s != d) {
9116 __ decode_heap_oop_not_null(d, s);
9117 } else {
9118 __ decode_heap_oop_not_null(d);
9119 }
9120 %}
9121 ins_pipe(ialu_reg_long);
9122 %}
9123
9124 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9125 match(Set dst (EncodePKlass src));
9126 effect(TEMP dst, KILL cr);
9127 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9128 ins_encode %{
9129 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9130 %}
9131 ins_pipe(ialu_reg_long);
9132 %}
9133
9134 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9135 match(Set dst (DecodeNKlass src));
9136 effect(TEMP dst, KILL cr);
9137 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9138 ins_encode %{
9139 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9140 %}
9141 ins_pipe(ialu_reg_long);
9142 %}
9143
9144 //----------Conditional Move---------------------------------------------------
9145 // Jump
9146 // dummy instruction for generating temp registers
9147 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9148 match(Jump (LShiftL switch_val shift));
9149 ins_cost(350);
9150 predicate(false);
9151 effect(TEMP dest);
9152
9153 format %{ "leaq $dest, [$constantaddress]\n\t"
9154 "jmp [$dest + $switch_val << $shift]\n\t" %}
9155 ins_encode %{
9156 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9157 // to do that and the compiler is using that register as one it can allocate.
9158 // So we build it all by hand.
9159 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9160 // ArrayAddress dispatch(table, index);
9161 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9162 __ lea($dest$$Register, $constantaddress);
9163 __ jmp(dispatch);
9164 %}
9165 ins_pipe(pipe_jmp);
9166 %}
9167
9168 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9169 match(Jump (AddL (LShiftL switch_val shift) offset));
9170 ins_cost(350);
9171 effect(TEMP dest);
9172
9173 format %{ "leaq $dest, [$constantaddress]\n\t"
9174 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9175 ins_encode %{
9176 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9177 // to do that and the compiler is using that register as one it can allocate.
9178 // So we build it all by hand.
9179 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9180 // ArrayAddress dispatch(table, index);
9181 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9182 __ lea($dest$$Register, $constantaddress);
9183 __ jmp(dispatch);
9184 %}
9185 ins_pipe(pipe_jmp);
9186 %}
9187
9188 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9189 match(Jump switch_val);
9190 ins_cost(350);
9191 effect(TEMP dest);
9192
9193 format %{ "leaq $dest, [$constantaddress]\n\t"
9194 "jmp [$dest + $switch_val]\n\t" %}
9195 ins_encode %{
9196 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9197 // to do that and the compiler is using that register as one it can allocate.
9198 // So we build it all by hand.
9199 // Address index(noreg, switch_reg, Address::times_1);
9200 // ArrayAddress dispatch(table, index);
9201 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9202 __ lea($dest$$Register, $constantaddress);
9203 __ jmp(dispatch);
9204 %}
9205 ins_pipe(pipe_jmp);
9206 %}
9207
9208 // Conditional move
9209 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9210 %{
9211 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9212 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9213
9214 ins_cost(100); // XXX
9215 format %{ "setbn$cop $dst\t# signed, int" %}
9216 ins_encode %{
9217 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9218 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9219 %}
9220 ins_pipe(ialu_reg);
9221 %}
9222
9223 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9224 %{
9225 predicate(!UseAPX);
9226 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9227
9228 ins_cost(200); // XXX
9229 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9230 ins_encode %{
9231 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9232 %}
9233 ins_pipe(pipe_cmov_reg);
9234 %}
9235
9236 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9237 %{
9238 predicate(UseAPX);
9239 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9240
9241 ins_cost(200);
9242 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9243 ins_encode %{
9244 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9245 %}
9246 ins_pipe(pipe_cmov_reg);
9247 %}
9248
9249 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9250 %{
9251 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9252 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9253
9254 ins_cost(100); // XXX
9255 format %{ "setbn$cop $dst\t# unsigned, int" %}
9256 ins_encode %{
9257 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9258 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9259 %}
9260 ins_pipe(ialu_reg);
9261 %}
9262
9263 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9264 predicate(!UseAPX);
9265 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9266
9267 ins_cost(200); // XXX
9268 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9269 ins_encode %{
9270 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9271 %}
9272 ins_pipe(pipe_cmov_reg);
9273 %}
9274
9275 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9276 predicate(UseAPX);
9277 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9278
9279 ins_cost(200);
9280 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9281 ins_encode %{
9282 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9283 %}
9284 ins_pipe(pipe_cmov_reg);
9285 %}
9286
9287 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9288 %{
9289 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9290 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9291
9292 ins_cost(100); // XXX
9293 format %{ "setbn$cop $dst\t# unsigned, int" %}
9294 ins_encode %{
9295 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9296 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9297 %}
9298 ins_pipe(ialu_reg);
9299 %}
9300
9301 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9302 %{
9303 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9304 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9305
9306 ins_cost(100); // XXX
9307 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9308 ins_encode %{
9309 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9310 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9311 %}
9312 ins_pipe(ialu_reg);
9313 %}
9314
9315 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9316 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9317
9318 ins_cost(200);
9319 expand %{
9320 cmovI_regU(cop, cr, dst, src);
9321 %}
9322 %}
9323
9324 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9325 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9326
9327 ins_cost(200);
9328 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9329 ins_encode %{
9330 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9331 %}
9332 ins_pipe(pipe_cmov_reg);
9333 %}
9334
9335 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9336 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9337 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9338
9339 ins_cost(200); // XXX
9340 format %{ "cmovpl $dst, $src\n\t"
9341 "cmovnel $dst, $src" %}
9342 ins_encode %{
9343 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9344 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9345 %}
9346 ins_pipe(pipe_cmov_reg);
9347 %}
9348
9349 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9350 // inputs of the CMove
9351 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9352 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9353 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9354 effect(TEMP dst);
9355
9356 ins_cost(200); // XXX
9357 format %{ "cmovpl $dst, $src\n\t"
9358 "cmovnel $dst, $src" %}
9359 ins_encode %{
9360 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9361 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9362 %}
9363 ins_pipe(pipe_cmov_reg);
9364 %}
9365
9366 // Conditional move
9367 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9368 predicate(!UseAPX);
9369 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9370
9371 ins_cost(250); // XXX
9372 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9373 ins_encode %{
9374 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9375 %}
9376 ins_pipe(pipe_cmov_mem);
9377 %}
9378
9379 // Conditional move
9380 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9381 %{
9382 predicate(UseAPX);
9383 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9384
9385 ins_cost(250);
9386 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9387 ins_encode %{
9388 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9389 %}
9390 ins_pipe(pipe_cmov_mem);
9391 %}
9392
9393 // Conditional move
9394 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9395 %{
9396 predicate(!UseAPX);
9397 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9398
9399 ins_cost(250); // XXX
9400 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9401 ins_encode %{
9402 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9403 %}
9404 ins_pipe(pipe_cmov_mem);
9405 %}
9406
9407 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9408 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9409
9410 ins_cost(250);
9411 expand %{
9412 cmovI_memU(cop, cr, dst, src);
9413 %}
9414 %}
9415
9416 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9417 %{
9418 predicate(UseAPX);
9419 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9420
9421 ins_cost(250);
9422 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9423 ins_encode %{
9424 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9425 %}
9426 ins_pipe(pipe_cmov_mem);
9427 %}
9428
9429 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9430 %{
9431 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9432
9433 ins_cost(250);
9434 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9435 ins_encode %{
9436 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9437 %}
9438 ins_pipe(pipe_cmov_mem);
9439 %}
9440
9441 // Conditional move
9442 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9443 %{
9444 predicate(!UseAPX);
9445 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9446
9447 ins_cost(200); // XXX
9448 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9449 ins_encode %{
9450 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9451 %}
9452 ins_pipe(pipe_cmov_reg);
9453 %}
9454
9455 // Conditional move ndd
9456 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9457 %{
9458 predicate(UseAPX);
9459 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9460
9461 ins_cost(200);
9462 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9463 ins_encode %{
9464 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9465 %}
9466 ins_pipe(pipe_cmov_reg);
9467 %}
9468
9469 // Conditional move
9470 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9471 %{
9472 predicate(!UseAPX);
9473 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9474
9475 ins_cost(200); // XXX
9476 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9477 ins_encode %{
9478 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9479 %}
9480 ins_pipe(pipe_cmov_reg);
9481 %}
9482
9483 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9484 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9485
9486 ins_cost(200);
9487 expand %{
9488 cmovN_regU(cop, cr, dst, src);
9489 %}
9490 %}
9491
9492 // Conditional move ndd
9493 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9494 %{
9495 predicate(UseAPX);
9496 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9497
9498 ins_cost(200);
9499 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9500 ins_encode %{
9501 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9502 %}
9503 ins_pipe(pipe_cmov_reg);
9504 %}
9505
9506 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9507 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9508
9509 ins_cost(200);
9510 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9511 ins_encode %{
9512 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9513 %}
9514 ins_pipe(pipe_cmov_reg);
9515 %}
9516
9517 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9518 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9519 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9520
9521 ins_cost(200); // XXX
9522 format %{ "cmovpl $dst, $src\n\t"
9523 "cmovnel $dst, $src" %}
9524 ins_encode %{
9525 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9526 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9527 %}
9528 ins_pipe(pipe_cmov_reg);
9529 %}
9530
9531 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9532 // inputs of the CMove
9533 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9534 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9535 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9536
9537 ins_cost(200); // XXX
9538 format %{ "cmovpl $dst, $src\n\t"
9539 "cmovnel $dst, $src" %}
9540 ins_encode %{
9541 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9542 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9543 %}
9544 ins_pipe(pipe_cmov_reg);
9545 %}
9546
9547 // Conditional move
9548 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9549 %{
9550 predicate(!UseAPX);
9551 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9552
9553 ins_cost(200); // XXX
9554 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9555 ins_encode %{
9556 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9557 %}
9558 ins_pipe(pipe_cmov_reg); // XXX
9559 %}
9560
9561 // Conditional move ndd
9562 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9563 %{
9564 predicate(UseAPX);
9565 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9566
9567 ins_cost(200);
9568 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9569 ins_encode %{
9570 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9571 %}
9572 ins_pipe(pipe_cmov_reg);
9573 %}
9574
9575 // Conditional move
9576 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9577 %{
9578 predicate(!UseAPX);
9579 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9580
9581 ins_cost(200); // XXX
9582 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9583 ins_encode %{
9584 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9585 %}
9586 ins_pipe(pipe_cmov_reg); // XXX
9587 %}
9588
9589 // Conditional move ndd
9590 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9591 %{
9592 predicate(UseAPX);
9593 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9594
9595 ins_cost(200);
9596 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9597 ins_encode %{
9598 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9599 %}
9600 ins_pipe(pipe_cmov_reg);
9601 %}
9602
9603 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9604 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9605
9606 ins_cost(200);
9607 expand %{
9608 cmovP_regU(cop, cr, dst, src);
9609 %}
9610 %}
9611
9612 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9613 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9614
9615 ins_cost(200);
9616 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9617 ins_encode %{
9618 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9619 %}
9620 ins_pipe(pipe_cmov_reg);
9621 %}
9622
9623 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9624 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9625 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9626
9627 ins_cost(200); // XXX
9628 format %{ "cmovpq $dst, $src\n\t"
9629 "cmovneq $dst, $src" %}
9630 ins_encode %{
9631 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9632 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9633 %}
9634 ins_pipe(pipe_cmov_reg);
9635 %}
9636
9637 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9638 // inputs of the CMove
9639 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9640 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9641 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9642
9643 ins_cost(200); // XXX
9644 format %{ "cmovpq $dst, $src\n\t"
9645 "cmovneq $dst, $src" %}
9646 ins_encode %{
9647 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9648 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9649 %}
9650 ins_pipe(pipe_cmov_reg);
9651 %}
9652
9653 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9654 %{
9655 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9656 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9657
9658 ins_cost(100); // XXX
9659 format %{ "setbn$cop $dst\t# signed, long" %}
9660 ins_encode %{
9661 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9662 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9663 %}
9664 ins_pipe(ialu_reg);
9665 %}
9666
9667 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9668 %{
9669 predicate(!UseAPX);
9670 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9671
9672 ins_cost(200); // XXX
9673 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9674 ins_encode %{
9675 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9676 %}
9677 ins_pipe(pipe_cmov_reg); // XXX
9678 %}
9679
9680 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9681 %{
9682 predicate(UseAPX);
9683 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9684
9685 ins_cost(200);
9686 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9687 ins_encode %{
9688 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9689 %}
9690 ins_pipe(pipe_cmov_reg);
9691 %}
9692
9693 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9694 %{
9695 predicate(!UseAPX);
9696 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9697
9698 ins_cost(200); // XXX
9699 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9700 ins_encode %{
9701 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9702 %}
9703 ins_pipe(pipe_cmov_mem); // XXX
9704 %}
9705
9706 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9707 %{
9708 predicate(UseAPX);
9709 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9710
9711 ins_cost(200);
9712 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9713 ins_encode %{
9714 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9715 %}
9716 ins_pipe(pipe_cmov_mem);
9717 %}
9718
9719 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9720 %{
9721 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9722 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9723
9724 ins_cost(100); // XXX
9725 format %{ "setbn$cop $dst\t# unsigned, long" %}
9726 ins_encode %{
9727 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9728 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9729 %}
9730 ins_pipe(ialu_reg);
9731 %}
9732
9733 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9734 %{
9735 predicate(!UseAPX);
9736 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9737
9738 ins_cost(200); // XXX
9739 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9740 ins_encode %{
9741 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9742 %}
9743 ins_pipe(pipe_cmov_reg); // XXX
9744 %}
9745
9746 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9747 %{
9748 predicate(UseAPX);
9749 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9750
9751 ins_cost(200);
9752 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9753 ins_encode %{
9754 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9755 %}
9756 ins_pipe(pipe_cmov_reg);
9757 %}
9758
9759 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9760 %{
9761 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9762 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9763
9764 ins_cost(100); // XXX
9765 format %{ "setbn$cop $dst\t# unsigned, long" %}
9766 ins_encode %{
9767 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9768 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9769 %}
9770 ins_pipe(ialu_reg);
9771 %}
9772
9773 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9774 %{
9775 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9776 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9777
9778 ins_cost(100); // XXX
9779 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9780 ins_encode %{
9781 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9782 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9783 %}
9784 ins_pipe(ialu_reg);
9785 %}
9786
9787 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9788 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9789
9790 ins_cost(200);
9791 expand %{
9792 cmovL_regU(cop, cr, dst, src);
9793 %}
9794 %}
9795
9796 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9797 %{
9798 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9799
9800 ins_cost(200);
9801 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9802 ins_encode %{
9803 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9804 %}
9805 ins_pipe(pipe_cmov_reg);
9806 %}
9807
9808 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9809 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9810 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9811
9812 ins_cost(200); // XXX
9813 format %{ "cmovpq $dst, $src\n\t"
9814 "cmovneq $dst, $src" %}
9815 ins_encode %{
9816 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9817 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9818 %}
9819 ins_pipe(pipe_cmov_reg);
9820 %}
9821
9822 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9823 // inputs of the CMove
9824 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9825 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9826 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9827
9828 ins_cost(200); // XXX
9829 format %{ "cmovpq $dst, $src\n\t"
9830 "cmovneq $dst, $src" %}
9831 ins_encode %{
9832 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9833 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9834 %}
9835 ins_pipe(pipe_cmov_reg);
9836 %}
9837
9838 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9839 %{
9840 predicate(!UseAPX);
9841 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9842
9843 ins_cost(200); // XXX
9844 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9845 ins_encode %{
9846 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9847 %}
9848 ins_pipe(pipe_cmov_mem); // XXX
9849 %}
9850
9851 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9852 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9853
9854 ins_cost(200);
9855 expand %{
9856 cmovL_memU(cop, cr, dst, src);
9857 %}
9858 %}
9859
9860 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9861 %{
9862 predicate(UseAPX);
9863 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9864
9865 ins_cost(200);
9866 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9867 ins_encode %{
9868 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9869 %}
9870 ins_pipe(pipe_cmov_mem);
9871 %}
9872
9873 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9874 %{
9875 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9876
9877 ins_cost(200);
9878 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9879 ins_encode %{
9880 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9881 %}
9882 ins_pipe(pipe_cmov_mem);
9883 %}
9884
9885 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9886 %{
9887 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9888
9889 ins_cost(200); // XXX
9890 format %{ "jn$cop skip\t# signed cmove float\n\t"
9891 "movss $dst, $src\n"
9892 "skip:" %}
9893 ins_encode %{
9894 Label Lskip;
9895 // Invert sense of branch from sense of CMOV
9896 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9897 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9898 __ bind(Lskip);
9899 %}
9900 ins_pipe(pipe_slow);
9901 %}
9902
9903 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9904 %{
9905 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9906
9907 ins_cost(200); // XXX
9908 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9909 "movss $dst, $src\n"
9910 "skip:" %}
9911 ins_encode %{
9912 Label Lskip;
9913 // Invert sense of branch from sense of CMOV
9914 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9915 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9916 __ bind(Lskip);
9917 %}
9918 ins_pipe(pipe_slow);
9919 %}
9920
9921 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9922 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9923
9924 ins_cost(200);
9925 expand %{
9926 cmovF_regU(cop, cr, dst, src);
9927 %}
9928 %}
9929
9930 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9931 %{
9932 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9933
9934 ins_cost(200); // XXX
9935 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9936 "movss $dst, $src\n"
9937 "skip:" %}
9938 ins_encode %{
9939 Label Lskip;
9940 // Invert sense of branch from sense of CMOV
9941 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9942 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9943 __ bind(Lskip);
9944 %}
9945 ins_pipe(pipe_slow);
9946 %}
9947
9948 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9949 %{
9950 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9951
9952 ins_cost(200); // XXX
9953 format %{ "jn$cop skip\t# signed cmove double\n\t"
9954 "movsd $dst, $src\n"
9955 "skip:" %}
9956 ins_encode %{
9957 Label Lskip;
9958 // Invert sense of branch from sense of CMOV
9959 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9960 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9961 __ bind(Lskip);
9962 %}
9963 ins_pipe(pipe_slow);
9964 %}
9965
9966 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9967 %{
9968 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9969
9970 ins_cost(200); // XXX
9971 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9972 "movsd $dst, $src\n"
9973 "skip:" %}
9974 ins_encode %{
9975 Label Lskip;
9976 // Invert sense of branch from sense of CMOV
9977 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9978 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9979 __ bind(Lskip);
9980 %}
9981 ins_pipe(pipe_slow);
9982 %}
9983
9984 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9985 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9986
9987 ins_cost(200);
9988 expand %{
9989 cmovD_regU(cop, cr, dst, src);
9990 %}
9991 %}
9992
9993 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9994 %{
9995 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9996
9997 ins_cost(200); // XXX
9998 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9999 "movsd $dst, $src\n"
10000 "skip:" %}
10001 ins_encode %{
10002 Label Lskip;
10003 // Invert sense of branch from sense of CMOV
10004 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
10005 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
10006 __ bind(Lskip);
10007 %}
10008 ins_pipe(pipe_slow);
10009 %}
10010
10011 //----------Arithmetic Instructions--------------------------------------------
10012 //----------Addition Instructions----------------------------------------------
10013
10014 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10015 %{
10016 predicate(!UseAPX);
10017 match(Set dst (AddI dst src));
10018 effect(KILL cr);
10019 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10020 format %{ "addl $dst, $src\t# int" %}
10021 ins_encode %{
10022 __ addl($dst$$Register, $src$$Register);
10023 %}
10024 ins_pipe(ialu_reg_reg);
10025 %}
10026
10027 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10028 %{
10029 predicate(UseAPX);
10030 match(Set dst (AddI src1 src2));
10031 effect(KILL cr);
10032 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10033
10034 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10035 ins_encode %{
10036 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10037 %}
10038 ins_pipe(ialu_reg_reg);
10039 %}
10040
10041 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10042 %{
10043 predicate(!UseAPX);
10044 match(Set dst (AddI dst src));
10045 effect(KILL cr);
10046 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10047
10048 format %{ "addl $dst, $src\t# int" %}
10049 ins_encode %{
10050 __ addl($dst$$Register, $src$$constant);
10051 %}
10052 ins_pipe( ialu_reg );
10053 %}
10054
10055 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10056 %{
10057 predicate(UseAPX);
10058 match(Set dst (AddI src1 src2));
10059 effect(KILL cr);
10060 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10061
10062 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10063 ins_encode %{
10064 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10065 %}
10066 ins_pipe( ialu_reg );
10067 %}
10068
10069 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10070 %{
10071 predicate(UseAPX);
10072 match(Set dst (AddI (LoadI src1) src2));
10073 effect(KILL cr);
10074 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10075
10076 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10077 ins_encode %{
10078 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10079 %}
10080 ins_pipe( ialu_reg );
10081 %}
10082
10083 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10084 %{
10085 predicate(!UseAPX);
10086 match(Set dst (AddI dst (LoadI src)));
10087 effect(KILL cr);
10088 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10089
10090 ins_cost(150); // XXX
10091 format %{ "addl $dst, $src\t# int" %}
10092 ins_encode %{
10093 __ addl($dst$$Register, $src$$Address);
10094 %}
10095 ins_pipe(ialu_reg_mem);
10096 %}
10097
10098 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10099 %{
10100 predicate(UseAPX);
10101 match(Set dst (AddI src1 (LoadI src2)));
10102 effect(KILL cr);
10103 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10104
10105 ins_cost(150);
10106 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10107 ins_encode %{
10108 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10109 %}
10110 ins_pipe(ialu_reg_mem);
10111 %}
10112
10113 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10114 %{
10115 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10116 effect(KILL cr);
10117 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10118
10119 ins_cost(150); // XXX
10120 format %{ "addl $dst, $src\t# int" %}
10121 ins_encode %{
10122 __ addl($dst$$Address, $src$$Register);
10123 %}
10124 ins_pipe(ialu_mem_reg);
10125 %}
10126
10127 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10128 %{
10129 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10130 effect(KILL cr);
10131 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10132
10133
10134 ins_cost(125); // XXX
10135 format %{ "addl $dst, $src\t# int" %}
10136 ins_encode %{
10137 __ addl($dst$$Address, $src$$constant);
10138 %}
10139 ins_pipe(ialu_mem_imm);
10140 %}
10141
10142 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10143 %{
10144 predicate(!UseAPX && UseIncDec);
10145 match(Set dst (AddI dst src));
10146 effect(KILL cr);
10147
10148 format %{ "incl $dst\t# int" %}
10149 ins_encode %{
10150 __ incrementl($dst$$Register);
10151 %}
10152 ins_pipe(ialu_reg);
10153 %}
10154
10155 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10156 %{
10157 predicate(UseAPX && UseIncDec);
10158 match(Set dst (AddI src val));
10159 effect(KILL cr);
10160 flag(PD::Flag_ndd_demotable_opr1);
10161
10162 format %{ "eincl $dst, $src\t# int ndd" %}
10163 ins_encode %{
10164 __ eincl($dst$$Register, $src$$Register, false);
10165 %}
10166 ins_pipe(ialu_reg);
10167 %}
10168
10169 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10170 %{
10171 predicate(UseAPX && UseIncDec);
10172 match(Set dst (AddI (LoadI src) val));
10173 effect(KILL cr);
10174
10175 format %{ "eincl $dst, $src\t# int ndd" %}
10176 ins_encode %{
10177 __ eincl($dst$$Register, $src$$Address, false);
10178 %}
10179 ins_pipe(ialu_reg);
10180 %}
10181
10182 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10183 %{
10184 predicate(UseIncDec);
10185 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10186 effect(KILL cr);
10187
10188 ins_cost(125); // XXX
10189 format %{ "incl $dst\t# int" %}
10190 ins_encode %{
10191 __ incrementl($dst$$Address);
10192 %}
10193 ins_pipe(ialu_mem_imm);
10194 %}
10195
10196 // XXX why does that use AddI
10197 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10198 %{
10199 predicate(!UseAPX && UseIncDec);
10200 match(Set dst (AddI dst src));
10201 effect(KILL cr);
10202
10203 format %{ "decl $dst\t# int" %}
10204 ins_encode %{
10205 __ decrementl($dst$$Register);
10206 %}
10207 ins_pipe(ialu_reg);
10208 %}
10209
10210 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10211 %{
10212 predicate(UseAPX && UseIncDec);
10213 match(Set dst (AddI src val));
10214 effect(KILL cr);
10215 flag(PD::Flag_ndd_demotable_opr1);
10216
10217 format %{ "edecl $dst, $src\t# int ndd" %}
10218 ins_encode %{
10219 __ edecl($dst$$Register, $src$$Register, false);
10220 %}
10221 ins_pipe(ialu_reg);
10222 %}
10223
10224 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10225 %{
10226 predicate(UseAPX && UseIncDec);
10227 match(Set dst (AddI (LoadI src) val));
10228 effect(KILL cr);
10229
10230 format %{ "edecl $dst, $src\t# int ndd" %}
10231 ins_encode %{
10232 __ edecl($dst$$Register, $src$$Address, false);
10233 %}
10234 ins_pipe(ialu_reg);
10235 %}
10236
10237 // XXX why does that use AddI
10238 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10239 %{
10240 predicate(UseIncDec);
10241 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10242 effect(KILL cr);
10243
10244 ins_cost(125); // XXX
10245 format %{ "decl $dst\t# int" %}
10246 ins_encode %{
10247 __ decrementl($dst$$Address);
10248 %}
10249 ins_pipe(ialu_mem_imm);
10250 %}
10251
10252 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10253 %{
10254 predicate(VM_Version::supports_fast_2op_lea());
10255 match(Set dst (AddI (LShiftI index scale) disp));
10256
10257 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10258 ins_encode %{
10259 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10260 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10261 %}
10262 ins_pipe(ialu_reg_reg);
10263 %}
10264
10265 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10266 %{
10267 predicate(VM_Version::supports_fast_3op_lea());
10268 match(Set dst (AddI (AddI base index) disp));
10269
10270 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10271 ins_encode %{
10272 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10273 %}
10274 ins_pipe(ialu_reg_reg);
10275 %}
10276
10277 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10278 %{
10279 predicate(VM_Version::supports_fast_2op_lea());
10280 match(Set dst (AddI base (LShiftI index scale)));
10281
10282 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10283 ins_encode %{
10284 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10285 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10286 %}
10287 ins_pipe(ialu_reg_reg);
10288 %}
10289
10290 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10291 %{
10292 predicate(VM_Version::supports_fast_3op_lea());
10293 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10294
10295 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10296 ins_encode %{
10297 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10298 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10299 %}
10300 ins_pipe(ialu_reg_reg);
10301 %}
10302
10303 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10304 %{
10305 predicate(!UseAPX);
10306 match(Set dst (AddL dst src));
10307 effect(KILL cr);
10308 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10309
10310 format %{ "addq $dst, $src\t# long" %}
10311 ins_encode %{
10312 __ addq($dst$$Register, $src$$Register);
10313 %}
10314 ins_pipe(ialu_reg_reg);
10315 %}
10316
10317 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10318 %{
10319 predicate(UseAPX);
10320 match(Set dst (AddL src1 src2));
10321 effect(KILL cr);
10322 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10323
10324 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10325 ins_encode %{
10326 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10327 %}
10328 ins_pipe(ialu_reg_reg);
10329 %}
10330
10331 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10332 %{
10333 predicate(!UseAPX);
10334 match(Set dst (AddL dst src));
10335 effect(KILL cr);
10336 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10337
10338 format %{ "addq $dst, $src\t# long" %}
10339 ins_encode %{
10340 __ addq($dst$$Register, $src$$constant);
10341 %}
10342 ins_pipe( ialu_reg );
10343 %}
10344
10345 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10346 %{
10347 predicate(UseAPX);
10348 match(Set dst (AddL src1 src2));
10349 effect(KILL cr);
10350 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10351
10352 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10353 ins_encode %{
10354 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10355 %}
10356 ins_pipe( ialu_reg );
10357 %}
10358
10359 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10360 %{
10361 predicate(UseAPX);
10362 match(Set dst (AddL (LoadL src1) src2));
10363 effect(KILL cr);
10364 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10365
10366 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10367 ins_encode %{
10368 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10369 %}
10370 ins_pipe( ialu_reg );
10371 %}
10372
10373 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10374 %{
10375 predicate(!UseAPX);
10376 match(Set dst (AddL dst (LoadL src)));
10377 effect(KILL cr);
10378 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10379
10380 ins_cost(150); // XXX
10381 format %{ "addq $dst, $src\t# long" %}
10382 ins_encode %{
10383 __ addq($dst$$Register, $src$$Address);
10384 %}
10385 ins_pipe(ialu_reg_mem);
10386 %}
10387
10388 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10389 %{
10390 predicate(UseAPX);
10391 match(Set dst (AddL src1 (LoadL src2)));
10392 effect(KILL cr);
10393 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10394
10395 ins_cost(150);
10396 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10397 ins_encode %{
10398 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10399 %}
10400 ins_pipe(ialu_reg_mem);
10401 %}
10402
10403 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10404 %{
10405 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10406 effect(KILL cr);
10407 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10408
10409 ins_cost(150); // XXX
10410 format %{ "addq $dst, $src\t# long" %}
10411 ins_encode %{
10412 __ addq($dst$$Address, $src$$Register);
10413 %}
10414 ins_pipe(ialu_mem_reg);
10415 %}
10416
10417 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10418 %{
10419 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10420 effect(KILL cr);
10421 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10422
10423 ins_cost(125); // XXX
10424 format %{ "addq $dst, $src\t# long" %}
10425 ins_encode %{
10426 __ addq($dst$$Address, $src$$constant);
10427 %}
10428 ins_pipe(ialu_mem_imm);
10429 %}
10430
10431 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10432 %{
10433 predicate(!UseAPX && UseIncDec);
10434 match(Set dst (AddL dst src));
10435 effect(KILL cr);
10436
10437 format %{ "incq $dst\t# long" %}
10438 ins_encode %{
10439 __ incrementq($dst$$Register);
10440 %}
10441 ins_pipe(ialu_reg);
10442 %}
10443
10444 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10445 %{
10446 predicate(UseAPX && UseIncDec);
10447 match(Set dst (AddL src val));
10448 effect(KILL cr);
10449 flag(PD::Flag_ndd_demotable_opr1);
10450
10451 format %{ "eincq $dst, $src\t# long ndd" %}
10452 ins_encode %{
10453 __ eincq($dst$$Register, $src$$Register, false);
10454 %}
10455 ins_pipe(ialu_reg);
10456 %}
10457
10458 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10459 %{
10460 predicate(UseAPX && UseIncDec);
10461 match(Set dst (AddL (LoadL src) val));
10462 effect(KILL cr);
10463
10464 format %{ "eincq $dst, $src\t# long ndd" %}
10465 ins_encode %{
10466 __ eincq($dst$$Register, $src$$Address, false);
10467 %}
10468 ins_pipe(ialu_reg);
10469 %}
10470
10471 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10472 %{
10473 predicate(UseIncDec);
10474 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10475 effect(KILL cr);
10476
10477 ins_cost(125); // XXX
10478 format %{ "incq $dst\t# long" %}
10479 ins_encode %{
10480 __ incrementq($dst$$Address);
10481 %}
10482 ins_pipe(ialu_mem_imm);
10483 %}
10484
10485 // XXX why does that use AddL
10486 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10487 %{
10488 predicate(!UseAPX && UseIncDec);
10489 match(Set dst (AddL dst src));
10490 effect(KILL cr);
10491
10492 format %{ "decq $dst\t# long" %}
10493 ins_encode %{
10494 __ decrementq($dst$$Register);
10495 %}
10496 ins_pipe(ialu_reg);
10497 %}
10498
10499 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10500 %{
10501 predicate(UseAPX && UseIncDec);
10502 match(Set dst (AddL src val));
10503 effect(KILL cr);
10504 flag(PD::Flag_ndd_demotable_opr1);
10505
10506 format %{ "edecq $dst, $src\t# long ndd" %}
10507 ins_encode %{
10508 __ edecq($dst$$Register, $src$$Register, false);
10509 %}
10510 ins_pipe(ialu_reg);
10511 %}
10512
10513 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10514 %{
10515 predicate(UseAPX && UseIncDec);
10516 match(Set dst (AddL (LoadL src) val));
10517 effect(KILL cr);
10518
10519 format %{ "edecq $dst, $src\t# long ndd" %}
10520 ins_encode %{
10521 __ edecq($dst$$Register, $src$$Address, false);
10522 %}
10523 ins_pipe(ialu_reg);
10524 %}
10525
10526 // XXX why does that use AddL
10527 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10528 %{
10529 predicate(UseIncDec);
10530 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10531 effect(KILL cr);
10532
10533 ins_cost(125); // XXX
10534 format %{ "decq $dst\t# long" %}
10535 ins_encode %{
10536 __ decrementq($dst$$Address);
10537 %}
10538 ins_pipe(ialu_mem_imm);
10539 %}
10540
10541 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10542 %{
10543 predicate(VM_Version::supports_fast_2op_lea());
10544 match(Set dst (AddL (LShiftL index scale) disp));
10545
10546 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10547 ins_encode %{
10548 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10549 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10550 %}
10551 ins_pipe(ialu_reg_reg);
10552 %}
10553
10554 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10555 %{
10556 predicate(VM_Version::supports_fast_3op_lea());
10557 match(Set dst (AddL (AddL base index) disp));
10558
10559 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10560 ins_encode %{
10561 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10562 %}
10563 ins_pipe(ialu_reg_reg);
10564 %}
10565
10566 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10567 %{
10568 predicate(VM_Version::supports_fast_2op_lea());
10569 match(Set dst (AddL base (LShiftL index scale)));
10570
10571 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10572 ins_encode %{
10573 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10574 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10575 %}
10576 ins_pipe(ialu_reg_reg);
10577 %}
10578
10579 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10580 %{
10581 predicate(VM_Version::supports_fast_3op_lea());
10582 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10583
10584 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10585 ins_encode %{
10586 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10587 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10588 %}
10589 ins_pipe(ialu_reg_reg);
10590 %}
10591
10592 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10593 %{
10594 match(Set dst (AddP dst src));
10595 effect(KILL cr);
10596 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10597
10598 format %{ "addq $dst, $src\t# ptr" %}
10599 ins_encode %{
10600 __ addq($dst$$Register, $src$$Register);
10601 %}
10602 ins_pipe(ialu_reg_reg);
10603 %}
10604
10605 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10606 %{
10607 match(Set dst (AddP dst src));
10608 effect(KILL cr);
10609 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10610
10611 format %{ "addq $dst, $src\t# ptr" %}
10612 ins_encode %{
10613 __ addq($dst$$Register, $src$$constant);
10614 %}
10615 ins_pipe( ialu_reg );
10616 %}
10617
10618 // XXX addP mem ops ????
10619
10620 instruct checkCastPP(rRegP dst)
10621 %{
10622 match(Set dst (CheckCastPP dst));
10623
10624 size(0);
10625 format %{ "# checkcastPP of $dst" %}
10626 ins_encode(/* empty encoding */);
10627 ins_pipe(empty);
10628 %}
10629
10630 instruct castPP(rRegP dst)
10631 %{
10632 match(Set dst (CastPP dst));
10633
10634 size(0);
10635 format %{ "# castPP of $dst" %}
10636 ins_encode(/* empty encoding */);
10637 ins_pipe(empty);
10638 %}
10639
10640 instruct castII(rRegI dst)
10641 %{
10642 predicate(VerifyConstraintCasts == 0);
10643 match(Set dst (CastII dst));
10644
10645 size(0);
10646 format %{ "# castII of $dst" %}
10647 ins_encode(/* empty encoding */);
10648 ins_cost(0);
10649 ins_pipe(empty);
10650 %}
10651
10652 instruct castII_checked(rRegI dst, rFlagsReg cr)
10653 %{
10654 predicate(VerifyConstraintCasts > 0);
10655 match(Set dst (CastII dst));
10656
10657 effect(KILL cr);
10658 format %{ "# cast_checked_II $dst" %}
10659 ins_encode %{
10660 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10661 %}
10662 ins_pipe(pipe_slow);
10663 %}
10664
10665 instruct castLL(rRegL dst)
10666 %{
10667 predicate(VerifyConstraintCasts == 0);
10668 match(Set dst (CastLL dst));
10669
10670 size(0);
10671 format %{ "# castLL of $dst" %}
10672 ins_encode(/* empty encoding */);
10673 ins_cost(0);
10674 ins_pipe(empty);
10675 %}
10676
10677 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10678 %{
10679 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10680 match(Set dst (CastLL dst));
10681
10682 effect(KILL cr);
10683 format %{ "# cast_checked_LL $dst" %}
10684 ins_encode %{
10685 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10686 %}
10687 ins_pipe(pipe_slow);
10688 %}
10689
10690 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10691 %{
10692 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10693 match(Set dst (CastLL dst));
10694
10695 effect(KILL cr, TEMP tmp);
10696 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10697 ins_encode %{
10698 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10699 %}
10700 ins_pipe(pipe_slow);
10701 %}
10702
10703 instruct castFF(regF dst)
10704 %{
10705 match(Set dst (CastFF dst));
10706
10707 size(0);
10708 format %{ "# castFF of $dst" %}
10709 ins_encode(/* empty encoding */);
10710 ins_cost(0);
10711 ins_pipe(empty);
10712 %}
10713
10714 instruct castHH(regF dst)
10715 %{
10716 match(Set dst (CastHH dst));
10717
10718 size(0);
10719 format %{ "# castHH of $dst" %}
10720 ins_encode(/* empty encoding */);
10721 ins_cost(0);
10722 ins_pipe(empty);
10723 %}
10724
10725 instruct castDD(regD dst)
10726 %{
10727 match(Set dst (CastDD dst));
10728
10729 size(0);
10730 format %{ "# castDD of $dst" %}
10731 ins_encode(/* empty encoding */);
10732 ins_cost(0);
10733 ins_pipe(empty);
10734 %}
10735
10736 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10737 instruct compareAndSwapP(rRegI res,
10738 memory mem_ptr,
10739 rax_RegP oldval, rRegP newval,
10740 rFlagsReg cr)
10741 %{
10742 predicate(n->as_LoadStore()->barrier_data() == 0);
10743 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10744 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10745 effect(KILL cr, KILL oldval);
10746
10747 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10748 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10749 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10750 ins_encode %{
10751 __ lock();
10752 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10753 __ setcc(Assembler::equal, $res$$Register);
10754 %}
10755 ins_pipe( pipe_cmpxchg );
10756 %}
10757
10758 instruct compareAndSwapL(rRegI res,
10759 memory mem_ptr,
10760 rax_RegL oldval, rRegL newval,
10761 rFlagsReg cr)
10762 %{
10763 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10764 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10765 effect(KILL cr, KILL oldval);
10766
10767 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10768 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10769 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10770 ins_encode %{
10771 __ lock();
10772 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10773 __ setcc(Assembler::equal, $res$$Register);
10774 %}
10775 ins_pipe( pipe_cmpxchg );
10776 %}
10777
10778 instruct compareAndSwapI(rRegI res,
10779 memory mem_ptr,
10780 rax_RegI oldval, rRegI newval,
10781 rFlagsReg cr)
10782 %{
10783 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10784 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10785 effect(KILL cr, KILL oldval);
10786
10787 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10788 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10789 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10790 ins_encode %{
10791 __ lock();
10792 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10793 __ setcc(Assembler::equal, $res$$Register);
10794 %}
10795 ins_pipe( pipe_cmpxchg );
10796 %}
10797
10798 instruct compareAndSwapB(rRegI res,
10799 memory mem_ptr,
10800 rax_RegI oldval, rRegI newval,
10801 rFlagsReg cr)
10802 %{
10803 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10804 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10805 effect(KILL cr, KILL oldval);
10806
10807 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10808 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10809 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10810 ins_encode %{
10811 __ lock();
10812 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10813 __ setcc(Assembler::equal, $res$$Register);
10814 %}
10815 ins_pipe( pipe_cmpxchg );
10816 %}
10817
10818 instruct compareAndSwapS(rRegI res,
10819 memory mem_ptr,
10820 rax_RegI oldval, rRegI newval,
10821 rFlagsReg cr)
10822 %{
10823 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10824 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10825 effect(KILL cr, KILL oldval);
10826
10827 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10828 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10829 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10830 ins_encode %{
10831 __ lock();
10832 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10833 __ setcc(Assembler::equal, $res$$Register);
10834 %}
10835 ins_pipe( pipe_cmpxchg );
10836 %}
10837
10838 instruct compareAndSwapN(rRegI res,
10839 memory mem_ptr,
10840 rax_RegN oldval, rRegN newval,
10841 rFlagsReg cr) %{
10842 predicate(n->as_LoadStore()->barrier_data() == 0);
10843 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10844 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10845 effect(KILL cr, KILL oldval);
10846
10847 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10849 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10850 ins_encode %{
10851 __ lock();
10852 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10853 __ setcc(Assembler::equal, $res$$Register);
10854 %}
10855 ins_pipe( pipe_cmpxchg );
10856 %}
10857
10858 instruct compareAndExchangeB(
10859 memory mem_ptr,
10860 rax_RegI oldval, rRegI newval,
10861 rFlagsReg cr)
10862 %{
10863 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10864 effect(KILL cr);
10865
10866 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10867 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10868 ins_encode %{
10869 __ lock();
10870 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10871 %}
10872 ins_pipe( pipe_cmpxchg );
10873 %}
10874
10875 instruct compareAndExchangeS(
10876 memory mem_ptr,
10877 rax_RegI oldval, rRegI newval,
10878 rFlagsReg cr)
10879 %{
10880 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10881 effect(KILL cr);
10882
10883 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10884 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10885 ins_encode %{
10886 __ lock();
10887 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10888 %}
10889 ins_pipe( pipe_cmpxchg );
10890 %}
10891
10892 instruct compareAndExchangeI(
10893 memory mem_ptr,
10894 rax_RegI oldval, rRegI newval,
10895 rFlagsReg cr)
10896 %{
10897 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10898 effect(KILL cr);
10899
10900 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10901 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10902 ins_encode %{
10903 __ lock();
10904 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10905 %}
10906 ins_pipe( pipe_cmpxchg );
10907 %}
10908
10909 instruct compareAndExchangeL(
10910 memory mem_ptr,
10911 rax_RegL oldval, rRegL newval,
10912 rFlagsReg cr)
10913 %{
10914 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10915 effect(KILL cr);
10916
10917 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10918 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10919 ins_encode %{
10920 __ lock();
10921 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10922 %}
10923 ins_pipe( pipe_cmpxchg );
10924 %}
10925
10926 instruct compareAndExchangeN(
10927 memory mem_ptr,
10928 rax_RegN oldval, rRegN newval,
10929 rFlagsReg cr) %{
10930 predicate(n->as_LoadStore()->barrier_data() == 0);
10931 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10932 effect(KILL cr);
10933
10934 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10935 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10936 ins_encode %{
10937 __ lock();
10938 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10939 %}
10940 ins_pipe( pipe_cmpxchg );
10941 %}
10942
10943 instruct compareAndExchangeP(
10944 memory mem_ptr,
10945 rax_RegP oldval, rRegP newval,
10946 rFlagsReg cr)
10947 %{
10948 predicate(n->as_LoadStore()->barrier_data() == 0);
10949 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10950 effect(KILL cr);
10951
10952 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10953 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10954 ins_encode %{
10955 __ lock();
10956 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10957 %}
10958 ins_pipe( pipe_cmpxchg );
10959 %}
10960
10961 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10962 predicate(n->as_LoadStore()->result_not_used());
10963 match(Set dummy (GetAndAddB mem add));
10964 effect(KILL cr);
10965 format %{ "addb_lock $mem, $add" %}
10966 ins_encode %{
10967 __ lock();
10968 __ addb($mem$$Address, $add$$Register);
10969 %}
10970 ins_pipe(pipe_cmpxchg);
10971 %}
10972
10973 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10974 predicate(n->as_LoadStore()->result_not_used());
10975 match(Set dummy (GetAndAddB mem add));
10976 effect(KILL cr);
10977 format %{ "addb_lock $mem, $add" %}
10978 ins_encode %{
10979 __ lock();
10980 __ addb($mem$$Address, $add$$constant);
10981 %}
10982 ins_pipe(pipe_cmpxchg);
10983 %}
10984
10985 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10986 predicate(!n->as_LoadStore()->result_not_used());
10987 match(Set newval (GetAndAddB mem newval));
10988 effect(KILL cr);
10989 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10990 ins_encode %{
10991 __ lock();
10992 __ xaddb($mem$$Address, $newval$$Register);
10993 __ narrow_subword_type($newval$$Register, T_BYTE);
10994 %}
10995 ins_pipe(pipe_cmpxchg);
10996 %}
10997
10998 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10999 predicate(n->as_LoadStore()->result_not_used());
11000 match(Set dummy (GetAndAddS mem add));
11001 effect(KILL cr);
11002 format %{ "addw_lock $mem, $add" %}
11003 ins_encode %{
11004 __ lock();
11005 __ addw($mem$$Address, $add$$Register);
11006 %}
11007 ins_pipe(pipe_cmpxchg);
11008 %}
11009
11010 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11011 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
11012 match(Set dummy (GetAndAddS mem add));
11013 effect(KILL cr);
11014 format %{ "addw_lock $mem, $add" %}
11015 ins_encode %{
11016 __ lock();
11017 __ addw($mem$$Address, $add$$constant);
11018 %}
11019 ins_pipe(pipe_cmpxchg);
11020 %}
11021
11022 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
11023 predicate(!n->as_LoadStore()->result_not_used());
11024 match(Set newval (GetAndAddS mem newval));
11025 effect(KILL cr);
11026 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
11027 ins_encode %{
11028 __ lock();
11029 __ xaddw($mem$$Address, $newval$$Register);
11030 __ narrow_subword_type($newval$$Register, T_SHORT);
11031 %}
11032 ins_pipe(pipe_cmpxchg);
11033 %}
11034
11035 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11036 predicate(n->as_LoadStore()->result_not_used());
11037 match(Set dummy (GetAndAddI mem add));
11038 effect(KILL cr);
11039 format %{ "addl_lock $mem, $add" %}
11040 ins_encode %{
11041 __ lock();
11042 __ addl($mem$$Address, $add$$Register);
11043 %}
11044 ins_pipe(pipe_cmpxchg);
11045 %}
11046
11047 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11048 predicate(n->as_LoadStore()->result_not_used());
11049 match(Set dummy (GetAndAddI mem add));
11050 effect(KILL cr);
11051 format %{ "addl_lock $mem, $add" %}
11052 ins_encode %{
11053 __ lock();
11054 __ addl($mem$$Address, $add$$constant);
11055 %}
11056 ins_pipe(pipe_cmpxchg);
11057 %}
11058
11059 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11060 predicate(!n->as_LoadStore()->result_not_used());
11061 match(Set newval (GetAndAddI mem newval));
11062 effect(KILL cr);
11063 format %{ "xaddl_lock $mem, $newval" %}
11064 ins_encode %{
11065 __ lock();
11066 __ xaddl($mem$$Address, $newval$$Register);
11067 %}
11068 ins_pipe(pipe_cmpxchg);
11069 %}
11070
11071 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11072 predicate(n->as_LoadStore()->result_not_used());
11073 match(Set dummy (GetAndAddL mem add));
11074 effect(KILL cr);
11075 format %{ "addq_lock $mem, $add" %}
11076 ins_encode %{
11077 __ lock();
11078 __ addq($mem$$Address, $add$$Register);
11079 %}
11080 ins_pipe(pipe_cmpxchg);
11081 %}
11082
11083 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11084 predicate(n->as_LoadStore()->result_not_used());
11085 match(Set dummy (GetAndAddL mem add));
11086 effect(KILL cr);
11087 format %{ "addq_lock $mem, $add" %}
11088 ins_encode %{
11089 __ lock();
11090 __ addq($mem$$Address, $add$$constant);
11091 %}
11092 ins_pipe(pipe_cmpxchg);
11093 %}
11094
11095 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11096 predicate(!n->as_LoadStore()->result_not_used());
11097 match(Set newval (GetAndAddL mem newval));
11098 effect(KILL cr);
11099 format %{ "xaddq_lock $mem, $newval" %}
11100 ins_encode %{
11101 __ lock();
11102 __ xaddq($mem$$Address, $newval$$Register);
11103 %}
11104 ins_pipe(pipe_cmpxchg);
11105 %}
11106
11107 instruct xchgB( memory mem, rRegI newval) %{
11108 match(Set newval (GetAndSetB mem newval));
11109 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
11110 ins_encode %{
11111 __ xchgb($newval$$Register, $mem$$Address);
11112 __ narrow_subword_type($newval$$Register, T_BYTE);
11113 %}
11114 ins_pipe( pipe_cmpxchg );
11115 %}
11116
11117 instruct xchgS( memory mem, rRegI newval) %{
11118 match(Set newval (GetAndSetS mem newval));
11119 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
11120 ins_encode %{
11121 __ xchgw($newval$$Register, $mem$$Address);
11122 __ narrow_subword_type($newval$$Register, T_SHORT);
11123 %}
11124 ins_pipe( pipe_cmpxchg );
11125 %}
11126
11127 instruct xchgI( memory mem, rRegI newval) %{
11128 match(Set newval (GetAndSetI mem newval));
11129 format %{ "XCHGL $newval,[$mem]" %}
11130 ins_encode %{
11131 __ xchgl($newval$$Register, $mem$$Address);
11132 %}
11133 ins_pipe( pipe_cmpxchg );
11134 %}
11135
11136 instruct xchgL( memory mem, rRegL newval) %{
11137 match(Set newval (GetAndSetL mem newval));
11138 format %{ "XCHGL $newval,[$mem]" %}
11139 ins_encode %{
11140 __ xchgq($newval$$Register, $mem$$Address);
11141 %}
11142 ins_pipe( pipe_cmpxchg );
11143 %}
11144
11145 instruct xchgP( memory mem, rRegP newval) %{
11146 match(Set newval (GetAndSetP mem newval));
11147 predicate(n->as_LoadStore()->barrier_data() == 0);
11148 format %{ "XCHGQ $newval,[$mem]" %}
11149 ins_encode %{
11150 __ xchgq($newval$$Register, $mem$$Address);
11151 %}
11152 ins_pipe( pipe_cmpxchg );
11153 %}
11154
11155 instruct xchgN( memory mem, rRegN newval) %{
11156 predicate(n->as_LoadStore()->barrier_data() == 0);
11157 match(Set newval (GetAndSetN mem newval));
11158 format %{ "XCHGL $newval,$mem]" %}
11159 ins_encode %{
11160 __ xchgl($newval$$Register, $mem$$Address);
11161 %}
11162 ins_pipe( pipe_cmpxchg );
11163 %}
11164
11165 //----------Abs Instructions-------------------------------------------
11166
11167 // Integer Absolute Instructions
11168 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11169 %{
11170 match(Set dst (AbsI src));
11171 effect(TEMP dst, KILL cr);
11172 format %{ "xorl $dst, $dst\t# abs int\n\t"
11173 "subl $dst, $src\n\t"
11174 "cmovll $dst, $src" %}
11175 ins_encode %{
11176 __ xorl($dst$$Register, $dst$$Register);
11177 __ subl($dst$$Register, $src$$Register);
11178 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11179 %}
11180
11181 ins_pipe(ialu_reg_reg);
11182 %}
11183
11184 // Long Absolute Instructions
11185 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11186 %{
11187 match(Set dst (AbsL src));
11188 effect(TEMP dst, KILL cr);
11189 format %{ "xorl $dst, $dst\t# abs long\n\t"
11190 "subq $dst, $src\n\t"
11191 "cmovlq $dst, $src" %}
11192 ins_encode %{
11193 __ xorl($dst$$Register, $dst$$Register);
11194 __ subq($dst$$Register, $src$$Register);
11195 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11196 %}
11197
11198 ins_pipe(ialu_reg_reg);
11199 %}
11200
11201 //----------Subtraction Instructions-------------------------------------------
11202
11203 // Integer Subtraction Instructions
11204 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11205 %{
11206 predicate(!UseAPX);
11207 match(Set dst (SubI dst src));
11208 effect(KILL cr);
11209 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11210
11211 format %{ "subl $dst, $src\t# int" %}
11212 ins_encode %{
11213 __ subl($dst$$Register, $src$$Register);
11214 %}
11215 ins_pipe(ialu_reg_reg);
11216 %}
11217
11218 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11219 %{
11220 predicate(UseAPX);
11221 match(Set dst (SubI src1 src2));
11222 effect(KILL cr);
11223 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11224
11225 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11226 ins_encode %{
11227 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11228 %}
11229 ins_pipe(ialu_reg_reg);
11230 %}
11231
11232 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11233 %{
11234 predicate(UseAPX);
11235 match(Set dst (SubI src1 src2));
11236 effect(KILL cr);
11237 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11238
11239 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11240 ins_encode %{
11241 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11242 %}
11243 ins_pipe(ialu_reg_reg);
11244 %}
11245
11246 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11247 %{
11248 predicate(UseAPX);
11249 match(Set dst (SubI (LoadI src1) src2));
11250 effect(KILL cr);
11251 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11252
11253 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11254 ins_encode %{
11255 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11256 %}
11257 ins_pipe(ialu_reg_reg);
11258 %}
11259
11260 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11261 %{
11262 predicate(!UseAPX);
11263 match(Set dst (SubI dst (LoadI src)));
11264 effect(KILL cr);
11265 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11266
11267 ins_cost(150);
11268 format %{ "subl $dst, $src\t# int" %}
11269 ins_encode %{
11270 __ subl($dst$$Register, $src$$Address);
11271 %}
11272 ins_pipe(ialu_reg_mem);
11273 %}
11274
11275 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11276 %{
11277 predicate(UseAPX);
11278 match(Set dst (SubI src1 (LoadI src2)));
11279 effect(KILL cr);
11280 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11281
11282 ins_cost(150);
11283 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11284 ins_encode %{
11285 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11286 %}
11287 ins_pipe(ialu_reg_mem);
11288 %}
11289
11290 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11291 %{
11292 predicate(UseAPX);
11293 match(Set dst (SubI (LoadI src1) src2));
11294 effect(KILL cr);
11295 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11296
11297 ins_cost(150);
11298 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11299 ins_encode %{
11300 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11301 %}
11302 ins_pipe(ialu_reg_mem);
11303 %}
11304
11305 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11306 %{
11307 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11308 effect(KILL cr);
11309 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11310
11311 ins_cost(150);
11312 format %{ "subl $dst, $src\t# int" %}
11313 ins_encode %{
11314 __ subl($dst$$Address, $src$$Register);
11315 %}
11316 ins_pipe(ialu_mem_reg);
11317 %}
11318
11319 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11320 %{
11321 predicate(!UseAPX);
11322 match(Set dst (SubL dst src));
11323 effect(KILL cr);
11324 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11325
11326 format %{ "subq $dst, $src\t# long" %}
11327 ins_encode %{
11328 __ subq($dst$$Register, $src$$Register);
11329 %}
11330 ins_pipe(ialu_reg_reg);
11331 %}
11332
11333 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11334 %{
11335 predicate(UseAPX);
11336 match(Set dst (SubL src1 src2));
11337 effect(KILL cr);
11338 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11339
11340 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11341 ins_encode %{
11342 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11343 %}
11344 ins_pipe(ialu_reg_reg);
11345 %}
11346
11347 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11348 %{
11349 predicate(UseAPX);
11350 match(Set dst (SubL src1 src2));
11351 effect(KILL cr);
11352 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11353
11354 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11355 ins_encode %{
11356 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11357 %}
11358 ins_pipe(ialu_reg_reg);
11359 %}
11360
11361 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11362 %{
11363 predicate(UseAPX);
11364 match(Set dst (SubL (LoadL src1) src2));
11365 effect(KILL cr);
11366 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11367
11368 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11369 ins_encode %{
11370 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11371 %}
11372 ins_pipe(ialu_reg_reg);
11373 %}
11374
11375 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11376 %{
11377 predicate(!UseAPX);
11378 match(Set dst (SubL dst (LoadL src)));
11379 effect(KILL cr);
11380 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11381
11382 ins_cost(150);
11383 format %{ "subq $dst, $src\t# long" %}
11384 ins_encode %{
11385 __ subq($dst$$Register, $src$$Address);
11386 %}
11387 ins_pipe(ialu_reg_mem);
11388 %}
11389
11390 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11391 %{
11392 predicate(UseAPX);
11393 match(Set dst (SubL src1 (LoadL src2)));
11394 effect(KILL cr);
11395 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11396
11397 ins_cost(150);
11398 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11399 ins_encode %{
11400 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11401 %}
11402 ins_pipe(ialu_reg_mem);
11403 %}
11404
11405 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11406 %{
11407 predicate(UseAPX);
11408 match(Set dst (SubL (LoadL src1) src2));
11409 effect(KILL cr);
11410 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11411
11412 ins_cost(150);
11413 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11414 ins_encode %{
11415 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11416 %}
11417 ins_pipe(ialu_reg_mem);
11418 %}
11419
11420 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11421 %{
11422 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11423 effect(KILL cr);
11424 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11425
11426 ins_cost(150);
11427 format %{ "subq $dst, $src\t# long" %}
11428 ins_encode %{
11429 __ subq($dst$$Address, $src$$Register);
11430 %}
11431 ins_pipe(ialu_mem_reg);
11432 %}
11433
11434 // Subtract from a pointer
11435 // XXX hmpf???
11436 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11437 %{
11438 match(Set dst (AddP dst (SubI zero src)));
11439 effect(KILL cr);
11440
11441 format %{ "subq $dst, $src\t# ptr - int" %}
11442 ins_encode %{
11443 __ subq($dst$$Register, $src$$Register);
11444 %}
11445 ins_pipe(ialu_reg_reg);
11446 %}
11447
11448 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11449 %{
11450 predicate(!UseAPX);
11451 match(Set dst (SubI zero dst));
11452 effect(KILL cr);
11453 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11454
11455 format %{ "negl $dst\t# int" %}
11456 ins_encode %{
11457 __ negl($dst$$Register);
11458 %}
11459 ins_pipe(ialu_reg);
11460 %}
11461
11462 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11463 %{
11464 predicate(UseAPX);
11465 match(Set dst (SubI zero src));
11466 effect(KILL cr);
11467 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11468
11469 format %{ "enegl $dst, $src\t# int ndd" %}
11470 ins_encode %{
11471 __ enegl($dst$$Register, $src$$Register, false);
11472 %}
11473 ins_pipe(ialu_reg);
11474 %}
11475
11476 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11477 %{
11478 predicate(!UseAPX);
11479 match(Set dst (NegI dst));
11480 effect(KILL cr);
11481 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11482
11483 format %{ "negl $dst\t# int" %}
11484 ins_encode %{
11485 __ negl($dst$$Register);
11486 %}
11487 ins_pipe(ialu_reg);
11488 %}
11489
11490 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11491 %{
11492 predicate(UseAPX);
11493 match(Set dst (NegI src));
11494 effect(KILL cr);
11495 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11496
11497 format %{ "enegl $dst, $src\t# int ndd" %}
11498 ins_encode %{
11499 __ enegl($dst$$Register, $src$$Register, false);
11500 %}
11501 ins_pipe(ialu_reg);
11502 %}
11503
11504 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11505 %{
11506 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11507 effect(KILL cr);
11508 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11509
11510 format %{ "negl $dst\t# int" %}
11511 ins_encode %{
11512 __ negl($dst$$Address);
11513 %}
11514 ins_pipe(ialu_reg);
11515 %}
11516
11517 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11518 %{
11519 predicate(!UseAPX);
11520 match(Set dst (SubL zero dst));
11521 effect(KILL cr);
11522 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11523
11524 format %{ "negq $dst\t# long" %}
11525 ins_encode %{
11526 __ negq($dst$$Register);
11527 %}
11528 ins_pipe(ialu_reg);
11529 %}
11530
11531 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11532 %{
11533 predicate(UseAPX);
11534 match(Set dst (SubL zero src));
11535 effect(KILL cr);
11536 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11537
11538 format %{ "enegq $dst, $src\t# long ndd" %}
11539 ins_encode %{
11540 __ enegq($dst$$Register, $src$$Register, false);
11541 %}
11542 ins_pipe(ialu_reg);
11543 %}
11544
11545 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11546 %{
11547 predicate(!UseAPX);
11548 match(Set dst (NegL dst));
11549 effect(KILL cr);
11550 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11551
11552 format %{ "negq $dst\t# int" %}
11553 ins_encode %{
11554 __ negq($dst$$Register);
11555 %}
11556 ins_pipe(ialu_reg);
11557 %}
11558
11559 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11560 %{
11561 predicate(UseAPX);
11562 match(Set dst (NegL src));
11563 effect(KILL cr);
11564 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11565
11566 format %{ "enegq $dst, $src\t# long ndd" %}
11567 ins_encode %{
11568 __ enegq($dst$$Register, $src$$Register, false);
11569 %}
11570 ins_pipe(ialu_reg);
11571 %}
11572
11573 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11574 %{
11575 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11576 effect(KILL cr);
11577 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11578
11579 format %{ "negq $dst\t# long" %}
11580 ins_encode %{
11581 __ negq($dst$$Address);
11582 %}
11583 ins_pipe(ialu_reg);
11584 %}
11585
11586 //----------Multiplication/Division Instructions-------------------------------
11587 // Integer Multiplication Instructions
11588 // Multiply Register
11589
11590 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11591 %{
11592 predicate(!UseAPX);
11593 match(Set dst (MulI dst src));
11594 effect(KILL cr);
11595
11596 ins_cost(300);
11597 format %{ "imull $dst, $src\t# int" %}
11598 ins_encode %{
11599 __ imull($dst$$Register, $src$$Register);
11600 %}
11601 ins_pipe(ialu_reg_reg_alu0);
11602 %}
11603
11604 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11605 %{
11606 predicate(UseAPX);
11607 match(Set dst (MulI src1 src2));
11608 effect(KILL cr);
11609 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11610
11611 ins_cost(300);
11612 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11613 ins_encode %{
11614 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11615 %}
11616 ins_pipe(ialu_reg_reg_alu0);
11617 %}
11618
11619 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11620 %{
11621 match(Set dst (MulI src imm));
11622 effect(KILL cr);
11623
11624 ins_cost(300);
11625 format %{ "imull $dst, $src, $imm\t# int" %}
11626 ins_encode %{
11627 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11628 %}
11629 ins_pipe(ialu_reg_reg_alu0);
11630 %}
11631
11632 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11633 %{
11634 predicate(!UseAPX);
11635 match(Set dst (MulI dst (LoadI src)));
11636 effect(KILL cr);
11637
11638 ins_cost(350);
11639 format %{ "imull $dst, $src\t# int" %}
11640 ins_encode %{
11641 __ imull($dst$$Register, $src$$Address);
11642 %}
11643 ins_pipe(ialu_reg_mem_alu0);
11644 %}
11645
11646 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11647 %{
11648 predicate(UseAPX);
11649 match(Set dst (MulI src1 (LoadI src2)));
11650 effect(KILL cr);
11651 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11652
11653 ins_cost(350);
11654 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11655 ins_encode %{
11656 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11657 %}
11658 ins_pipe(ialu_reg_mem_alu0);
11659 %}
11660
11661 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11662 %{
11663 match(Set dst (MulI (LoadI src) imm));
11664 effect(KILL cr);
11665
11666 ins_cost(300);
11667 format %{ "imull $dst, $src, $imm\t# int" %}
11668 ins_encode %{
11669 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11670 %}
11671 ins_pipe(ialu_reg_mem_alu0);
11672 %}
11673
11674 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11675 %{
11676 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11677 effect(KILL cr, KILL src2);
11678
11679 expand %{ mulI_rReg(dst, src1, cr);
11680 mulI_rReg(src2, src3, cr);
11681 addI_rReg(dst, src2, cr); %}
11682 %}
11683
11684 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11685 %{
11686 predicate(!UseAPX);
11687 match(Set dst (MulL dst src));
11688 effect(KILL cr);
11689
11690 ins_cost(300);
11691 format %{ "imulq $dst, $src\t# long" %}
11692 ins_encode %{
11693 __ imulq($dst$$Register, $src$$Register);
11694 %}
11695 ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697
11698 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11699 %{
11700 predicate(UseAPX);
11701 match(Set dst (MulL src1 src2));
11702 effect(KILL cr);
11703 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11704
11705 ins_cost(300);
11706 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11707 ins_encode %{
11708 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11709 %}
11710 ins_pipe(ialu_reg_reg_alu0);
11711 %}
11712
11713 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11714 %{
11715 match(Set dst (MulL src imm));
11716 effect(KILL cr);
11717
11718 ins_cost(300);
11719 format %{ "imulq $dst, $src, $imm\t# long" %}
11720 ins_encode %{
11721 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11722 %}
11723 ins_pipe(ialu_reg_reg_alu0);
11724 %}
11725
11726 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11727 %{
11728 predicate(!UseAPX);
11729 match(Set dst (MulL dst (LoadL src)));
11730 effect(KILL cr);
11731
11732 ins_cost(350);
11733 format %{ "imulq $dst, $src\t# long" %}
11734 ins_encode %{
11735 __ imulq($dst$$Register, $src$$Address);
11736 %}
11737 ins_pipe(ialu_reg_mem_alu0);
11738 %}
11739
11740 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11741 %{
11742 predicate(UseAPX);
11743 match(Set dst (MulL src1 (LoadL src2)));
11744 effect(KILL cr);
11745 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11746
11747 ins_cost(350);
11748 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11749 ins_encode %{
11750 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11751 %}
11752 ins_pipe(ialu_reg_mem_alu0);
11753 %}
11754
11755 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11756 %{
11757 match(Set dst (MulL (LoadL src) imm));
11758 effect(KILL cr);
11759
11760 ins_cost(300);
11761 format %{ "imulq $dst, $src, $imm\t# long" %}
11762 ins_encode %{
11763 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11764 %}
11765 ins_pipe(ialu_reg_mem_alu0);
11766 %}
11767
11768 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11769 %{
11770 match(Set dst (MulHiL src rax));
11771 effect(USE_KILL rax, KILL cr);
11772
11773 ins_cost(300);
11774 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11775 ins_encode %{
11776 __ imulq($src$$Register);
11777 %}
11778 ins_pipe(ialu_reg_reg_alu0);
11779 %}
11780
11781 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11782 %{
11783 match(Set dst (UMulHiL src rax));
11784 effect(USE_KILL rax, KILL cr);
11785
11786 ins_cost(300);
11787 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11788 ins_encode %{
11789 __ mulq($src$$Register);
11790 %}
11791 ins_pipe(ialu_reg_reg_alu0);
11792 %}
11793
11794 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11795 rFlagsReg cr)
11796 %{
11797 match(Set rax (DivI rax div));
11798 effect(KILL rdx, KILL cr);
11799
11800 ins_cost(30*100+10*100); // XXX
11801 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11802 "jne,s normal\n\t"
11803 "xorl rdx, rdx\n\t"
11804 "cmpl $div, -1\n\t"
11805 "je,s done\n"
11806 "normal: cdql\n\t"
11807 "idivl $div\n"
11808 "done:" %}
11809 ins_encode(cdql_enc(div));
11810 ins_pipe(ialu_reg_reg_alu0);
11811 %}
11812
11813 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11814 rFlagsReg cr)
11815 %{
11816 match(Set rax (DivL rax div));
11817 effect(KILL rdx, KILL cr);
11818
11819 ins_cost(30*100+10*100); // XXX
11820 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11821 "cmpq rax, rdx\n\t"
11822 "jne,s normal\n\t"
11823 "xorl rdx, rdx\n\t"
11824 "cmpq $div, -1\n\t"
11825 "je,s done\n"
11826 "normal: cdqq\n\t"
11827 "idivq $div\n"
11828 "done:" %}
11829 ins_encode(cdqq_enc(div));
11830 ins_pipe(ialu_reg_reg_alu0);
11831 %}
11832
11833 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11834 %{
11835 match(Set rax (UDivI rax div));
11836 effect(KILL rdx, KILL cr);
11837
11838 ins_cost(300);
11839 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11840 ins_encode %{
11841 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11842 %}
11843 ins_pipe(ialu_reg_reg_alu0);
11844 %}
11845
11846 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11847 %{
11848 match(Set rax (UDivL rax div));
11849 effect(KILL rdx, KILL cr);
11850
11851 ins_cost(300);
11852 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11853 ins_encode %{
11854 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11855 %}
11856 ins_pipe(ialu_reg_reg_alu0);
11857 %}
11858
11859 // Integer DIVMOD with Register, both quotient and mod results
11860 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11861 rFlagsReg cr)
11862 %{
11863 match(DivModI rax div);
11864 effect(KILL cr);
11865
11866 ins_cost(30*100+10*100); // XXX
11867 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11868 "jne,s normal\n\t"
11869 "xorl rdx, rdx\n\t"
11870 "cmpl $div, -1\n\t"
11871 "je,s done\n"
11872 "normal: cdql\n\t"
11873 "idivl $div\n"
11874 "done:" %}
11875 ins_encode(cdql_enc(div));
11876 ins_pipe(pipe_slow);
11877 %}
11878
11879 // Long DIVMOD with Register, both quotient and mod results
11880 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11881 rFlagsReg cr)
11882 %{
11883 match(DivModL rax div);
11884 effect(KILL cr);
11885
11886 ins_cost(30*100+10*100); // XXX
11887 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11888 "cmpq rax, rdx\n\t"
11889 "jne,s normal\n\t"
11890 "xorl rdx, rdx\n\t"
11891 "cmpq $div, -1\n\t"
11892 "je,s done\n"
11893 "normal: cdqq\n\t"
11894 "idivq $div\n"
11895 "done:" %}
11896 ins_encode(cdqq_enc(div));
11897 ins_pipe(pipe_slow);
11898 %}
11899
11900 // Unsigned integer DIVMOD with Register, both quotient and mod results
11901 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11902 no_rax_rdx_RegI div, rFlagsReg cr)
11903 %{
11904 match(UDivModI rax div);
11905 effect(TEMP tmp, KILL cr);
11906
11907 ins_cost(300);
11908 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11909 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11910 %}
11911 ins_encode %{
11912 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11913 %}
11914 ins_pipe(pipe_slow);
11915 %}
11916
11917 // Unsigned long DIVMOD with Register, both quotient and mod results
11918 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11919 no_rax_rdx_RegL div, rFlagsReg cr)
11920 %{
11921 match(UDivModL rax div);
11922 effect(TEMP tmp, KILL cr);
11923
11924 ins_cost(300);
11925 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11926 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11927 %}
11928 ins_encode %{
11929 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11930 %}
11931 ins_pipe(pipe_slow);
11932 %}
11933
11934 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11935 rFlagsReg cr)
11936 %{
11937 match(Set rdx (ModI rax div));
11938 effect(KILL rax, KILL cr);
11939
11940 ins_cost(300); // XXX
11941 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11942 "jne,s normal\n\t"
11943 "xorl rdx, rdx\n\t"
11944 "cmpl $div, -1\n\t"
11945 "je,s done\n"
11946 "normal: cdql\n\t"
11947 "idivl $div\n"
11948 "done:" %}
11949 ins_encode(cdql_enc(div));
11950 ins_pipe(ialu_reg_reg_alu0);
11951 %}
11952
11953 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11954 rFlagsReg cr)
11955 %{
11956 match(Set rdx (ModL rax div));
11957 effect(KILL rax, KILL cr);
11958
11959 ins_cost(300); // XXX
11960 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11961 "cmpq rax, rdx\n\t"
11962 "jne,s normal\n\t"
11963 "xorl rdx, rdx\n\t"
11964 "cmpq $div, -1\n\t"
11965 "je,s done\n"
11966 "normal: cdqq\n\t"
11967 "idivq $div\n"
11968 "done:" %}
11969 ins_encode(cdqq_enc(div));
11970 ins_pipe(ialu_reg_reg_alu0);
11971 %}
11972
11973 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11974 %{
11975 match(Set rdx (UModI rax div));
11976 effect(KILL rax, KILL cr);
11977
11978 ins_cost(300);
11979 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11980 ins_encode %{
11981 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11982 %}
11983 ins_pipe(ialu_reg_reg_alu0);
11984 %}
11985
11986 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11987 %{
11988 match(Set rdx (UModL rax div));
11989 effect(KILL rax, KILL cr);
11990
11991 ins_cost(300);
11992 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11993 ins_encode %{
11994 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11995 %}
11996 ins_pipe(ialu_reg_reg_alu0);
11997 %}
11998
11999 // Integer Shift Instructions
12000 // Shift Left by one, two, three
12001 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
12002 %{
12003 predicate(!UseAPX);
12004 match(Set dst (LShiftI dst shift));
12005 effect(KILL cr);
12006
12007 format %{ "sall $dst, $shift" %}
12008 ins_encode %{
12009 __ sall($dst$$Register, $shift$$constant);
12010 %}
12011 ins_pipe(ialu_reg);
12012 %}
12013
12014 // Shift Left by one, two, three
12015 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
12016 %{
12017 predicate(UseAPX);
12018 match(Set dst (LShiftI src shift));
12019 effect(KILL cr);
12020 flag(PD::Flag_ndd_demotable_opr1);
12021
12022 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
12023 ins_encode %{
12024 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12025 %}
12026 ins_pipe(ialu_reg);
12027 %}
12028
12029 // Shift Left by 8-bit immediate
12030 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12031 %{
12032 predicate(!UseAPX);
12033 match(Set dst (LShiftI dst shift));
12034 effect(KILL cr);
12035
12036 format %{ "sall $dst, $shift" %}
12037 ins_encode %{
12038 __ sall($dst$$Register, $shift$$constant);
12039 %}
12040 ins_pipe(ialu_reg);
12041 %}
12042
12043 // Shift Left by 8-bit immediate
12044 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12045 %{
12046 predicate(UseAPX);
12047 match(Set dst (LShiftI src shift));
12048 effect(KILL cr);
12049 flag(PD::Flag_ndd_demotable_opr1);
12050
12051 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12052 ins_encode %{
12053 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12054 %}
12055 ins_pipe(ialu_reg);
12056 %}
12057
12058 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12059 %{
12060 predicate(UseAPX);
12061 match(Set dst (LShiftI (LoadI src) shift));
12062 effect(KILL cr);
12063
12064 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12065 ins_encode %{
12066 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12067 %}
12068 ins_pipe(ialu_reg);
12069 %}
12070
12071 // Shift Left by 8-bit immediate
12072 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12073 %{
12074 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12075 effect(KILL cr);
12076
12077 format %{ "sall $dst, $shift" %}
12078 ins_encode %{
12079 __ sall($dst$$Address, $shift$$constant);
12080 %}
12081 ins_pipe(ialu_mem_imm);
12082 %}
12083
12084 // Shift Left by variable
12085 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12086 %{
12087 predicate(!VM_Version::supports_bmi2());
12088 match(Set dst (LShiftI dst shift));
12089 effect(KILL cr);
12090
12091 format %{ "sall $dst, $shift" %}
12092 ins_encode %{
12093 __ sall($dst$$Register);
12094 %}
12095 ins_pipe(ialu_reg_reg);
12096 %}
12097
12098 // Shift Left by variable
12099 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12100 %{
12101 predicate(!VM_Version::supports_bmi2());
12102 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12103 effect(KILL cr);
12104
12105 format %{ "sall $dst, $shift" %}
12106 ins_encode %{
12107 __ sall($dst$$Address);
12108 %}
12109 ins_pipe(ialu_mem_reg);
12110 %}
12111
12112 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12113 %{
12114 predicate(VM_Version::supports_bmi2());
12115 match(Set dst (LShiftI src shift));
12116
12117 format %{ "shlxl $dst, $src, $shift" %}
12118 ins_encode %{
12119 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12120 %}
12121 ins_pipe(ialu_reg_reg);
12122 %}
12123
12124 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12125 %{
12126 predicate(VM_Version::supports_bmi2());
12127 match(Set dst (LShiftI (LoadI src) shift));
12128 ins_cost(175);
12129 format %{ "shlxl $dst, $src, $shift" %}
12130 ins_encode %{
12131 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12132 %}
12133 ins_pipe(ialu_reg_mem);
12134 %}
12135
12136 // Arithmetic Shift Right by 8-bit immediate
12137 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12138 %{
12139 predicate(!UseAPX);
12140 match(Set dst (RShiftI dst shift));
12141 effect(KILL cr);
12142
12143 format %{ "sarl $dst, $shift" %}
12144 ins_encode %{
12145 __ sarl($dst$$Register, $shift$$constant);
12146 %}
12147 ins_pipe(ialu_mem_imm);
12148 %}
12149
12150 // Arithmetic Shift Right by 8-bit immediate
12151 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12152 %{
12153 predicate(UseAPX);
12154 match(Set dst (RShiftI src shift));
12155 effect(KILL cr);
12156 flag(PD::Flag_ndd_demotable_opr1);
12157
12158 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12159 ins_encode %{
12160 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12161 %}
12162 ins_pipe(ialu_mem_imm);
12163 %}
12164
12165 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12166 %{
12167 predicate(UseAPX);
12168 match(Set dst (RShiftI (LoadI src) shift));
12169 effect(KILL cr);
12170
12171 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12172 ins_encode %{
12173 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12174 %}
12175 ins_pipe(ialu_mem_imm);
12176 %}
12177
12178 // Arithmetic Shift Right by 8-bit immediate
12179 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12180 %{
12181 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12182 effect(KILL cr);
12183
12184 format %{ "sarl $dst, $shift" %}
12185 ins_encode %{
12186 __ sarl($dst$$Address, $shift$$constant);
12187 %}
12188 ins_pipe(ialu_mem_imm);
12189 %}
12190
12191 // Arithmetic Shift Right by variable
12192 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12193 %{
12194 predicate(!VM_Version::supports_bmi2());
12195 match(Set dst (RShiftI dst shift));
12196 effect(KILL cr);
12197
12198 format %{ "sarl $dst, $shift" %}
12199 ins_encode %{
12200 __ sarl($dst$$Register);
12201 %}
12202 ins_pipe(ialu_reg_reg);
12203 %}
12204
12205 // Arithmetic Shift Right by variable
12206 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12207 %{
12208 predicate(!VM_Version::supports_bmi2());
12209 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12210 effect(KILL cr);
12211
12212 format %{ "sarl $dst, $shift" %}
12213 ins_encode %{
12214 __ sarl($dst$$Address);
12215 %}
12216 ins_pipe(ialu_mem_reg);
12217 %}
12218
12219 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12220 %{
12221 predicate(VM_Version::supports_bmi2());
12222 match(Set dst (RShiftI src shift));
12223
12224 format %{ "sarxl $dst, $src, $shift" %}
12225 ins_encode %{
12226 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12227 %}
12228 ins_pipe(ialu_reg_reg);
12229 %}
12230
12231 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12232 %{
12233 predicate(VM_Version::supports_bmi2());
12234 match(Set dst (RShiftI (LoadI src) shift));
12235 ins_cost(175);
12236 format %{ "sarxl $dst, $src, $shift" %}
12237 ins_encode %{
12238 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12239 %}
12240 ins_pipe(ialu_reg_mem);
12241 %}
12242
12243 // Logical Shift Right by 8-bit immediate
12244 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12245 %{
12246 predicate(!UseAPX);
12247 match(Set dst (URShiftI dst shift));
12248 effect(KILL cr);
12249
12250 format %{ "shrl $dst, $shift" %}
12251 ins_encode %{
12252 __ shrl($dst$$Register, $shift$$constant);
12253 %}
12254 ins_pipe(ialu_reg);
12255 %}
12256
12257 // Logical Shift Right by 8-bit immediate
12258 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12259 %{
12260 predicate(UseAPX);
12261 match(Set dst (URShiftI src shift));
12262 effect(KILL cr);
12263 flag(PD::Flag_ndd_demotable_opr1);
12264
12265 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12266 ins_encode %{
12267 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12268 %}
12269 ins_pipe(ialu_reg);
12270 %}
12271
12272 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12273 %{
12274 predicate(UseAPX);
12275 match(Set dst (URShiftI (LoadI src) shift));
12276 effect(KILL cr);
12277
12278 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12279 ins_encode %{
12280 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12281 %}
12282 ins_pipe(ialu_reg);
12283 %}
12284
12285 // Logical Shift Right by 8-bit immediate
12286 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12287 %{
12288 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12289 effect(KILL cr);
12290
12291 format %{ "shrl $dst, $shift" %}
12292 ins_encode %{
12293 __ shrl($dst$$Address, $shift$$constant);
12294 %}
12295 ins_pipe(ialu_mem_imm);
12296 %}
12297
12298 // Logical Shift Right by variable
12299 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12300 %{
12301 predicate(!VM_Version::supports_bmi2());
12302 match(Set dst (URShiftI dst shift));
12303 effect(KILL cr);
12304
12305 format %{ "shrl $dst, $shift" %}
12306 ins_encode %{
12307 __ shrl($dst$$Register);
12308 %}
12309 ins_pipe(ialu_reg_reg);
12310 %}
12311
12312 // Logical Shift Right by variable
12313 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12314 %{
12315 predicate(!VM_Version::supports_bmi2());
12316 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12317 effect(KILL cr);
12318
12319 format %{ "shrl $dst, $shift" %}
12320 ins_encode %{
12321 __ shrl($dst$$Address);
12322 %}
12323 ins_pipe(ialu_mem_reg);
12324 %}
12325
12326 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12327 %{
12328 predicate(VM_Version::supports_bmi2());
12329 match(Set dst (URShiftI src shift));
12330
12331 format %{ "shrxl $dst, $src, $shift" %}
12332 ins_encode %{
12333 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12334 %}
12335 ins_pipe(ialu_reg_reg);
12336 %}
12337
12338 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12339 %{
12340 predicate(VM_Version::supports_bmi2());
12341 match(Set dst (URShiftI (LoadI src) shift));
12342 ins_cost(175);
12343 format %{ "shrxl $dst, $src, $shift" %}
12344 ins_encode %{
12345 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12346 %}
12347 ins_pipe(ialu_reg_mem);
12348 %}
12349
12350 // Long Shift Instructions
12351 // Shift Left by one, two, three
12352 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12353 %{
12354 predicate(!UseAPX);
12355 match(Set dst (LShiftL dst shift));
12356 effect(KILL cr);
12357
12358 format %{ "salq $dst, $shift" %}
12359 ins_encode %{
12360 __ salq($dst$$Register, $shift$$constant);
12361 %}
12362 ins_pipe(ialu_reg);
12363 %}
12364
12365 // Shift Left by one, two, three
12366 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12367 %{
12368 predicate(UseAPX);
12369 match(Set dst (LShiftL src shift));
12370 effect(KILL cr);
12371 flag(PD::Flag_ndd_demotable_opr1);
12372
12373 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12374 ins_encode %{
12375 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12376 %}
12377 ins_pipe(ialu_reg);
12378 %}
12379
12380 // Shift Left by 8-bit immediate
12381 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12382 %{
12383 predicate(!UseAPX);
12384 match(Set dst (LShiftL dst shift));
12385 effect(KILL cr);
12386
12387 format %{ "salq $dst, $shift" %}
12388 ins_encode %{
12389 __ salq($dst$$Register, $shift$$constant);
12390 %}
12391 ins_pipe(ialu_reg);
12392 %}
12393
12394 // Shift Left by 8-bit immediate
12395 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12396 %{
12397 predicate(UseAPX);
12398 match(Set dst (LShiftL src shift));
12399 effect(KILL cr);
12400 flag(PD::Flag_ndd_demotable_opr1);
12401
12402 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12403 ins_encode %{
12404 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12405 %}
12406 ins_pipe(ialu_reg);
12407 %}
12408
12409 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12410 %{
12411 predicate(UseAPX);
12412 match(Set dst (LShiftL (LoadL src) shift));
12413 effect(KILL cr);
12414
12415 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12416 ins_encode %{
12417 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12418 %}
12419 ins_pipe(ialu_reg);
12420 %}
12421
12422 // Shift Left by 8-bit immediate
12423 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12424 %{
12425 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12426 effect(KILL cr);
12427
12428 format %{ "salq $dst, $shift" %}
12429 ins_encode %{
12430 __ salq($dst$$Address, $shift$$constant);
12431 %}
12432 ins_pipe(ialu_mem_imm);
12433 %}
12434
12435 // Shift Left by variable
12436 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12437 %{
12438 predicate(!VM_Version::supports_bmi2());
12439 match(Set dst (LShiftL dst shift));
12440 effect(KILL cr);
12441
12442 format %{ "salq $dst, $shift" %}
12443 ins_encode %{
12444 __ salq($dst$$Register);
12445 %}
12446 ins_pipe(ialu_reg_reg);
12447 %}
12448
12449 // Shift Left by variable
12450 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12451 %{
12452 predicate(!VM_Version::supports_bmi2());
12453 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12454 effect(KILL cr);
12455
12456 format %{ "salq $dst, $shift" %}
12457 ins_encode %{
12458 __ salq($dst$$Address);
12459 %}
12460 ins_pipe(ialu_mem_reg);
12461 %}
12462
12463 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12464 %{
12465 predicate(VM_Version::supports_bmi2());
12466 match(Set dst (LShiftL src shift));
12467
12468 format %{ "shlxq $dst, $src, $shift" %}
12469 ins_encode %{
12470 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12471 %}
12472 ins_pipe(ialu_reg_reg);
12473 %}
12474
12475 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12476 %{
12477 predicate(VM_Version::supports_bmi2());
12478 match(Set dst (LShiftL (LoadL src) shift));
12479 ins_cost(175);
12480 format %{ "shlxq $dst, $src, $shift" %}
12481 ins_encode %{
12482 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12483 %}
12484 ins_pipe(ialu_reg_mem);
12485 %}
12486
12487 // Arithmetic Shift Right by 8-bit immediate
12488 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12489 %{
12490 predicate(!UseAPX);
12491 match(Set dst (RShiftL dst shift));
12492 effect(KILL cr);
12493
12494 format %{ "sarq $dst, $shift" %}
12495 ins_encode %{
12496 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12497 %}
12498 ins_pipe(ialu_mem_imm);
12499 %}
12500
12501 // Arithmetic Shift Right by 8-bit immediate
12502 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12503 %{
12504 predicate(UseAPX);
12505 match(Set dst (RShiftL src shift));
12506 effect(KILL cr);
12507 flag(PD::Flag_ndd_demotable_opr1);
12508
12509 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12510 ins_encode %{
12511 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12512 %}
12513 ins_pipe(ialu_mem_imm);
12514 %}
12515
12516 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12517 %{
12518 predicate(UseAPX);
12519 match(Set dst (RShiftL (LoadL src) shift));
12520 effect(KILL cr);
12521
12522 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12523 ins_encode %{
12524 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12525 %}
12526 ins_pipe(ialu_mem_imm);
12527 %}
12528
12529 // Arithmetic Shift Right by 8-bit immediate
12530 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12531 %{
12532 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12533 effect(KILL cr);
12534
12535 format %{ "sarq $dst, $shift" %}
12536 ins_encode %{
12537 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12538 %}
12539 ins_pipe(ialu_mem_imm);
12540 %}
12541
12542 // Arithmetic Shift Right by variable
12543 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12544 %{
12545 predicate(!VM_Version::supports_bmi2());
12546 match(Set dst (RShiftL dst shift));
12547 effect(KILL cr);
12548
12549 format %{ "sarq $dst, $shift" %}
12550 ins_encode %{
12551 __ sarq($dst$$Register);
12552 %}
12553 ins_pipe(ialu_reg_reg);
12554 %}
12555
12556 // Arithmetic Shift Right by variable
12557 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12558 %{
12559 predicate(!VM_Version::supports_bmi2());
12560 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12561 effect(KILL cr);
12562
12563 format %{ "sarq $dst, $shift" %}
12564 ins_encode %{
12565 __ sarq($dst$$Address);
12566 %}
12567 ins_pipe(ialu_mem_reg);
12568 %}
12569
12570 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12571 %{
12572 predicate(VM_Version::supports_bmi2());
12573 match(Set dst (RShiftL src shift));
12574
12575 format %{ "sarxq $dst, $src, $shift" %}
12576 ins_encode %{
12577 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12578 %}
12579 ins_pipe(ialu_reg_reg);
12580 %}
12581
12582 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12583 %{
12584 predicate(VM_Version::supports_bmi2());
12585 match(Set dst (RShiftL (LoadL src) shift));
12586 ins_cost(175);
12587 format %{ "sarxq $dst, $src, $shift" %}
12588 ins_encode %{
12589 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12590 %}
12591 ins_pipe(ialu_reg_mem);
12592 %}
12593
12594 // Logical Shift Right by 8-bit immediate
12595 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12596 %{
12597 predicate(!UseAPX);
12598 match(Set dst (URShiftL dst shift));
12599 effect(KILL cr);
12600
12601 format %{ "shrq $dst, $shift" %}
12602 ins_encode %{
12603 __ shrq($dst$$Register, $shift$$constant);
12604 %}
12605 ins_pipe(ialu_reg);
12606 %}
12607
12608 // Logical Shift Right by 8-bit immediate
12609 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12610 %{
12611 predicate(UseAPX);
12612 match(Set dst (URShiftL src shift));
12613 effect(KILL cr);
12614 flag(PD::Flag_ndd_demotable_opr1);
12615
12616 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12617 ins_encode %{
12618 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12619 %}
12620 ins_pipe(ialu_reg);
12621 %}
12622
12623 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12624 %{
12625 predicate(UseAPX);
12626 match(Set dst (URShiftL (LoadL src) shift));
12627 effect(KILL cr);
12628
12629 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12630 ins_encode %{
12631 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12632 %}
12633 ins_pipe(ialu_reg);
12634 %}
12635
12636 // Logical Shift Right by 8-bit immediate
12637 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12638 %{
12639 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12640 effect(KILL cr);
12641
12642 format %{ "shrq $dst, $shift" %}
12643 ins_encode %{
12644 __ shrq($dst$$Address, $shift$$constant);
12645 %}
12646 ins_pipe(ialu_mem_imm);
12647 %}
12648
12649 // Logical Shift Right by variable
12650 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12651 %{
12652 predicate(!VM_Version::supports_bmi2());
12653 match(Set dst (URShiftL dst shift));
12654 effect(KILL cr);
12655
12656 format %{ "shrq $dst, $shift" %}
12657 ins_encode %{
12658 __ shrq($dst$$Register);
12659 %}
12660 ins_pipe(ialu_reg_reg);
12661 %}
12662
12663 // Logical Shift Right by variable
12664 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12665 %{
12666 predicate(!VM_Version::supports_bmi2());
12667 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12668 effect(KILL cr);
12669
12670 format %{ "shrq $dst, $shift" %}
12671 ins_encode %{
12672 __ shrq($dst$$Address);
12673 %}
12674 ins_pipe(ialu_mem_reg);
12675 %}
12676
12677 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12678 %{
12679 predicate(VM_Version::supports_bmi2());
12680 match(Set dst (URShiftL src shift));
12681
12682 format %{ "shrxq $dst, $src, $shift" %}
12683 ins_encode %{
12684 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12685 %}
12686 ins_pipe(ialu_reg_reg);
12687 %}
12688
12689 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12690 %{
12691 predicate(VM_Version::supports_bmi2());
12692 match(Set dst (URShiftL (LoadL src) shift));
12693 ins_cost(175);
12694 format %{ "shrxq $dst, $src, $shift" %}
12695 ins_encode %{
12696 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12697 %}
12698 ins_pipe(ialu_reg_mem);
12699 %}
12700
12701 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12702 // This idiom is used by the compiler for the i2b bytecode.
12703 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12704 %{
12705 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12706
12707 format %{ "movsbl $dst, $src\t# i2b" %}
12708 ins_encode %{
12709 __ movsbl($dst$$Register, $src$$Register);
12710 %}
12711 ins_pipe(ialu_reg_reg);
12712 %}
12713
12714 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12715 // This idiom is used by the compiler the i2s bytecode.
12716 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12717 %{
12718 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12719
12720 format %{ "movswl $dst, $src\t# i2s" %}
12721 ins_encode %{
12722 __ movswl($dst$$Register, $src$$Register);
12723 %}
12724 ins_pipe(ialu_reg_reg);
12725 %}
12726
12727 // ROL/ROR instructions
12728
12729 // Rotate left by constant.
12730 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12731 %{
12732 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12733 match(Set dst (RotateLeft dst shift));
12734 effect(KILL cr);
12735 format %{ "roll $dst, $shift" %}
12736 ins_encode %{
12737 __ roll($dst$$Register, $shift$$constant);
12738 %}
12739 ins_pipe(ialu_reg);
12740 %}
12741
12742 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12743 %{
12744 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12745 match(Set dst (RotateLeft src shift));
12746 format %{ "rolxl $dst, $src, $shift" %}
12747 ins_encode %{
12748 int shift = 32 - ($shift$$constant & 31);
12749 __ rorxl($dst$$Register, $src$$Register, shift);
12750 %}
12751 ins_pipe(ialu_reg_reg);
12752 %}
12753
12754 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12755 %{
12756 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12757 match(Set dst (RotateLeft (LoadI src) shift));
12758 ins_cost(175);
12759 format %{ "rolxl $dst, $src, $shift" %}
12760 ins_encode %{
12761 int shift = 32 - ($shift$$constant & 31);
12762 __ rorxl($dst$$Register, $src$$Address, shift);
12763 %}
12764 ins_pipe(ialu_reg_mem);
12765 %}
12766
12767 // Rotate Left by variable
12768 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12769 %{
12770 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12771 match(Set dst (RotateLeft dst shift));
12772 effect(KILL cr);
12773 format %{ "roll $dst, $shift" %}
12774 ins_encode %{
12775 __ roll($dst$$Register);
12776 %}
12777 ins_pipe(ialu_reg_reg);
12778 %}
12779
12780 // Rotate Left by variable
12781 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12782 %{
12783 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12784 match(Set dst (RotateLeft src shift));
12785 effect(KILL cr);
12786 flag(PD::Flag_ndd_demotable_opr1);
12787
12788 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12789 ins_encode %{
12790 __ eroll($dst$$Register, $src$$Register, false);
12791 %}
12792 ins_pipe(ialu_reg_reg);
12793 %}
12794
12795 // Rotate Right by constant.
12796 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12797 %{
12798 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12799 match(Set dst (RotateRight dst shift));
12800 effect(KILL cr);
12801 format %{ "rorl $dst, $shift" %}
12802 ins_encode %{
12803 __ rorl($dst$$Register, $shift$$constant);
12804 %}
12805 ins_pipe(ialu_reg);
12806 %}
12807
12808 // Rotate Right by constant.
12809 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12810 %{
12811 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12812 match(Set dst (RotateRight src shift));
12813 format %{ "rorxl $dst, $src, $shift" %}
12814 ins_encode %{
12815 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12816 %}
12817 ins_pipe(ialu_reg_reg);
12818 %}
12819
12820 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12821 %{
12822 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12823 match(Set dst (RotateRight (LoadI src) shift));
12824 ins_cost(175);
12825 format %{ "rorxl $dst, $src, $shift" %}
12826 ins_encode %{
12827 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12828 %}
12829 ins_pipe(ialu_reg_mem);
12830 %}
12831
12832 // Rotate Right by variable
12833 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12834 %{
12835 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12836 match(Set dst (RotateRight dst shift));
12837 effect(KILL cr);
12838 format %{ "rorl $dst, $shift" %}
12839 ins_encode %{
12840 __ rorl($dst$$Register);
12841 %}
12842 ins_pipe(ialu_reg_reg);
12843 %}
12844
12845 // Rotate Right by variable
12846 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12847 %{
12848 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12849 match(Set dst (RotateRight src shift));
12850 effect(KILL cr);
12851 flag(PD::Flag_ndd_demotable_opr1);
12852
12853 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12854 ins_encode %{
12855 __ erorl($dst$$Register, $src$$Register, false);
12856 %}
12857 ins_pipe(ialu_reg_reg);
12858 %}
12859
12860 // Rotate Left by constant.
12861 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12862 %{
12863 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12864 match(Set dst (RotateLeft dst shift));
12865 effect(KILL cr);
12866 format %{ "rolq $dst, $shift" %}
12867 ins_encode %{
12868 __ rolq($dst$$Register, $shift$$constant);
12869 %}
12870 ins_pipe(ialu_reg);
12871 %}
12872
12873 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12874 %{
12875 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12876 match(Set dst (RotateLeft src shift));
12877 format %{ "rolxq $dst, $src, $shift" %}
12878 ins_encode %{
12879 int shift = 64 - ($shift$$constant & 63);
12880 __ rorxq($dst$$Register, $src$$Register, shift);
12881 %}
12882 ins_pipe(ialu_reg_reg);
12883 %}
12884
12885 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12886 %{
12887 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12888 match(Set dst (RotateLeft (LoadL src) shift));
12889 ins_cost(175);
12890 format %{ "rolxq $dst, $src, $shift" %}
12891 ins_encode %{
12892 int shift = 64 - ($shift$$constant & 63);
12893 __ rorxq($dst$$Register, $src$$Address, shift);
12894 %}
12895 ins_pipe(ialu_reg_mem);
12896 %}
12897
12898 // Rotate Left by variable
12899 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12900 %{
12901 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12902 match(Set dst (RotateLeft dst shift));
12903 effect(KILL cr);
12904
12905 format %{ "rolq $dst, $shift" %}
12906 ins_encode %{
12907 __ rolq($dst$$Register);
12908 %}
12909 ins_pipe(ialu_reg_reg);
12910 %}
12911
12912 // Rotate Left by variable
12913 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12914 %{
12915 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12916 match(Set dst (RotateLeft src shift));
12917 effect(KILL cr);
12918 flag(PD::Flag_ndd_demotable_opr1);
12919
12920 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12921 ins_encode %{
12922 __ erolq($dst$$Register, $src$$Register, false);
12923 %}
12924 ins_pipe(ialu_reg_reg);
12925 %}
12926
12927 // Rotate Right by constant.
12928 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12929 %{
12930 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12931 match(Set dst (RotateRight dst shift));
12932 effect(KILL cr);
12933 format %{ "rorq $dst, $shift" %}
12934 ins_encode %{
12935 __ rorq($dst$$Register, $shift$$constant);
12936 %}
12937 ins_pipe(ialu_reg);
12938 %}
12939
12940 // Rotate Right by constant
12941 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12942 %{
12943 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12944 match(Set dst (RotateRight src shift));
12945 format %{ "rorxq $dst, $src, $shift" %}
12946 ins_encode %{
12947 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12948 %}
12949 ins_pipe(ialu_reg_reg);
12950 %}
12951
12952 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12953 %{
12954 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12955 match(Set dst (RotateRight (LoadL src) shift));
12956 ins_cost(175);
12957 format %{ "rorxq $dst, $src, $shift" %}
12958 ins_encode %{
12959 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12960 %}
12961 ins_pipe(ialu_reg_mem);
12962 %}
12963
12964 // Rotate Right by variable
12965 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12966 %{
12967 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12968 match(Set dst (RotateRight dst shift));
12969 effect(KILL cr);
12970 format %{ "rorq $dst, $shift" %}
12971 ins_encode %{
12972 __ rorq($dst$$Register);
12973 %}
12974 ins_pipe(ialu_reg_reg);
12975 %}
12976
12977 // Rotate Right by variable
12978 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12979 %{
12980 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12981 match(Set dst (RotateRight src shift));
12982 effect(KILL cr);
12983 flag(PD::Flag_ndd_demotable_opr1);
12984
12985 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12986 ins_encode %{
12987 __ erorq($dst$$Register, $src$$Register, false);
12988 %}
12989 ins_pipe(ialu_reg_reg);
12990 %}
12991
12992 //----------------------------- CompressBits/ExpandBits ------------------------
12993
12994 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12995 predicate(n->bottom_type()->isa_long());
12996 match(Set dst (CompressBits src mask));
12997 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12998 ins_encode %{
12999 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
13000 %}
13001 ins_pipe( pipe_slow );
13002 %}
13003
13004 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
13005 predicate(n->bottom_type()->isa_long());
13006 match(Set dst (ExpandBits src mask));
13007 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
13008 ins_encode %{
13009 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
13010 %}
13011 ins_pipe( pipe_slow );
13012 %}
13013
13014 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13015 predicate(n->bottom_type()->isa_long());
13016 match(Set dst (CompressBits src (LoadL mask)));
13017 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
13018 ins_encode %{
13019 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
13020 %}
13021 ins_pipe( pipe_slow );
13022 %}
13023
13024 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13025 predicate(n->bottom_type()->isa_long());
13026 match(Set dst (ExpandBits src (LoadL mask)));
13027 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
13028 ins_encode %{
13029 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
13030 %}
13031 ins_pipe( pipe_slow );
13032 %}
13033
13034
13035 // Logical Instructions
13036
13037 // Integer Logical Instructions
13038
13039 // And Instructions
13040 // And Register with Register
13041 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13042 %{
13043 predicate(!UseAPX);
13044 match(Set dst (AndI dst src));
13045 effect(KILL cr);
13046 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13047
13048 format %{ "andl $dst, $src\t# int" %}
13049 ins_encode %{
13050 __ andl($dst$$Register, $src$$Register);
13051 %}
13052 ins_pipe(ialu_reg_reg);
13053 %}
13054
13055 // And Register with Register using New Data Destination (NDD)
13056 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13057 %{
13058 predicate(UseAPX);
13059 match(Set dst (AndI src1 src2));
13060 effect(KILL cr);
13061 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13062
13063 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13064 ins_encode %{
13065 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13066
13067 %}
13068 ins_pipe(ialu_reg_reg);
13069 %}
13070
13071 // And Register with Immediate 255
13072 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13073 %{
13074 match(Set dst (AndI src mask));
13075
13076 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
13077 ins_encode %{
13078 __ movzbl($dst$$Register, $src$$Register);
13079 %}
13080 ins_pipe(ialu_reg);
13081 %}
13082
13083 // And Register with Immediate 255 and promote to long
13084 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13085 %{
13086 match(Set dst (ConvI2L (AndI src mask)));
13087
13088 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13089 ins_encode %{
13090 __ movzbl($dst$$Register, $src$$Register);
13091 %}
13092 ins_pipe(ialu_reg);
13093 %}
13094
13095 // And Register with Immediate 65535
13096 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13097 %{
13098 match(Set dst (AndI src mask));
13099
13100 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13101 ins_encode %{
13102 __ movzwl($dst$$Register, $src$$Register);
13103 %}
13104 ins_pipe(ialu_reg);
13105 %}
13106
13107 // And Register with Immediate 65535 and promote to long
13108 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13109 %{
13110 match(Set dst (ConvI2L (AndI src mask)));
13111
13112 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13113 ins_encode %{
13114 __ movzwl($dst$$Register, $src$$Register);
13115 %}
13116 ins_pipe(ialu_reg);
13117 %}
13118
13119 // Can skip int2long conversions after AND with small bitmask
13120 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13121 %{
13122 predicate(VM_Version::supports_bmi2());
13123 ins_cost(125);
13124 effect(TEMP tmp, KILL cr);
13125 match(Set dst (ConvI2L (AndI src mask)));
13126 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13127 ins_encode %{
13128 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13129 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13130 %}
13131 ins_pipe(ialu_reg_reg);
13132 %}
13133
13134 // And Register with Immediate
13135 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13136 %{
13137 predicate(!UseAPX);
13138 match(Set dst (AndI dst src));
13139 effect(KILL cr);
13140 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13141
13142 format %{ "andl $dst, $src\t# int" %}
13143 ins_encode %{
13144 __ andl($dst$$Register, $src$$constant);
13145 %}
13146 ins_pipe(ialu_reg);
13147 %}
13148
13149 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13150 %{
13151 predicate(UseAPX);
13152 match(Set dst (AndI src1 src2));
13153 effect(KILL cr);
13154 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13155
13156 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13157 ins_encode %{
13158 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13159 %}
13160 ins_pipe(ialu_reg);
13161 %}
13162
13163 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13164 %{
13165 predicate(UseAPX);
13166 match(Set dst (AndI (LoadI src1) src2));
13167 effect(KILL cr);
13168 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13169
13170 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13171 ins_encode %{
13172 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13173 %}
13174 ins_pipe(ialu_reg);
13175 %}
13176
13177 // And Register with Memory
13178 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13179 %{
13180 predicate(!UseAPX);
13181 match(Set dst (AndI dst (LoadI src)));
13182 effect(KILL cr);
13183 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13184
13185 ins_cost(150);
13186 format %{ "andl $dst, $src\t# int" %}
13187 ins_encode %{
13188 __ andl($dst$$Register, $src$$Address);
13189 %}
13190 ins_pipe(ialu_reg_mem);
13191 %}
13192
13193 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13194 %{
13195 predicate(UseAPX);
13196 match(Set dst (AndI src1 (LoadI src2)));
13197 effect(KILL cr);
13198 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13199
13200 ins_cost(150);
13201 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13202 ins_encode %{
13203 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13204 %}
13205 ins_pipe(ialu_reg_mem);
13206 %}
13207
13208 // And Memory with Register
13209 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13210 %{
13211 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13212 effect(KILL cr);
13213 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13214
13215 ins_cost(150);
13216 format %{ "andb $dst, $src\t# byte" %}
13217 ins_encode %{
13218 __ andb($dst$$Address, $src$$Register);
13219 %}
13220 ins_pipe(ialu_mem_reg);
13221 %}
13222
13223 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13224 %{
13225 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13226 effect(KILL cr);
13227 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13228
13229 ins_cost(150);
13230 format %{ "andl $dst, $src\t# int" %}
13231 ins_encode %{
13232 __ andl($dst$$Address, $src$$Register);
13233 %}
13234 ins_pipe(ialu_mem_reg);
13235 %}
13236
13237 // And Memory with Immediate
13238 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13239 %{
13240 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13241 effect(KILL cr);
13242 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13243
13244 ins_cost(125);
13245 format %{ "andl $dst, $src\t# int" %}
13246 ins_encode %{
13247 __ andl($dst$$Address, $src$$constant);
13248 %}
13249 ins_pipe(ialu_mem_imm);
13250 %}
13251
13252 // BMI1 instructions
13253 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13254 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13255 predicate(UseBMI1Instructions);
13256 effect(KILL cr);
13257 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13258
13259 ins_cost(125);
13260 format %{ "andnl $dst, $src1, $src2" %}
13261
13262 ins_encode %{
13263 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13264 %}
13265 ins_pipe(ialu_reg_mem);
13266 %}
13267
13268 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13269 match(Set dst (AndI (XorI src1 minus_1) src2));
13270 predicate(UseBMI1Instructions);
13271 effect(KILL cr);
13272 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13273
13274 format %{ "andnl $dst, $src1, $src2" %}
13275
13276 ins_encode %{
13277 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13278 %}
13279 ins_pipe(ialu_reg);
13280 %}
13281
13282 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13283 match(Set dst (AndI (SubI imm_zero src) src));
13284 predicate(UseBMI1Instructions);
13285 effect(KILL cr);
13286 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13287
13288 format %{ "blsil $dst, $src" %}
13289
13290 ins_encode %{
13291 __ blsil($dst$$Register, $src$$Register);
13292 %}
13293 ins_pipe(ialu_reg);
13294 %}
13295
13296 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13297 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13298 predicate(UseBMI1Instructions);
13299 effect(KILL cr);
13300 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13301
13302 ins_cost(125);
13303 format %{ "blsil $dst, $src" %}
13304
13305 ins_encode %{
13306 __ blsil($dst$$Register, $src$$Address);
13307 %}
13308 ins_pipe(ialu_reg_mem);
13309 %}
13310
13311 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13312 %{
13313 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13314 predicate(UseBMI1Instructions);
13315 effect(KILL cr);
13316 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13317
13318 ins_cost(125);
13319 format %{ "blsmskl $dst, $src" %}
13320
13321 ins_encode %{
13322 __ blsmskl($dst$$Register, $src$$Address);
13323 %}
13324 ins_pipe(ialu_reg_mem);
13325 %}
13326
13327 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13328 %{
13329 match(Set dst (XorI (AddI src minus_1) src));
13330 predicate(UseBMI1Instructions);
13331 effect(KILL cr);
13332 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13333
13334 format %{ "blsmskl $dst, $src" %}
13335
13336 ins_encode %{
13337 __ blsmskl($dst$$Register, $src$$Register);
13338 %}
13339
13340 ins_pipe(ialu_reg);
13341 %}
13342
13343 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13344 %{
13345 match(Set dst (AndI (AddI src minus_1) src) );
13346 predicate(UseBMI1Instructions);
13347 effect(KILL cr);
13348 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13349
13350 format %{ "blsrl $dst, $src" %}
13351
13352 ins_encode %{
13353 __ blsrl($dst$$Register, $src$$Register);
13354 %}
13355
13356 ins_pipe(ialu_reg_mem);
13357 %}
13358
13359 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13360 %{
13361 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13362 predicate(UseBMI1Instructions);
13363 effect(KILL cr);
13364 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13365
13366 ins_cost(125);
13367 format %{ "blsrl $dst, $src" %}
13368
13369 ins_encode %{
13370 __ blsrl($dst$$Register, $src$$Address);
13371 %}
13372
13373 ins_pipe(ialu_reg);
13374 %}
13375
13376 // Or Instructions
13377 // Or Register with Register
13378 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13379 %{
13380 predicate(!UseAPX);
13381 match(Set dst (OrI dst src));
13382 effect(KILL cr);
13383 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13384
13385 format %{ "orl $dst, $src\t# int" %}
13386 ins_encode %{
13387 __ orl($dst$$Register, $src$$Register);
13388 %}
13389 ins_pipe(ialu_reg_reg);
13390 %}
13391
13392 // Or Register with Register using New Data Destination (NDD)
13393 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13394 %{
13395 predicate(UseAPX);
13396 match(Set dst (OrI src1 src2));
13397 effect(KILL cr);
13398 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13399
13400 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13401 ins_encode %{
13402 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13403 %}
13404 ins_pipe(ialu_reg_reg);
13405 %}
13406
13407 // Or Register with Immediate
13408 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13409 %{
13410 predicate(!UseAPX);
13411 match(Set dst (OrI dst src));
13412 effect(KILL cr);
13413 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414
13415 format %{ "orl $dst, $src\t# int" %}
13416 ins_encode %{
13417 __ orl($dst$$Register, $src$$constant);
13418 %}
13419 ins_pipe(ialu_reg);
13420 %}
13421
13422 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13423 %{
13424 predicate(UseAPX);
13425 match(Set dst (OrI src1 src2));
13426 effect(KILL cr);
13427 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13428
13429 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13430 ins_encode %{
13431 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13432 %}
13433 ins_pipe(ialu_reg);
13434 %}
13435
13436 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13437 %{
13438 predicate(UseAPX);
13439 match(Set dst (OrI src1 src2));
13440 effect(KILL cr);
13441 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13442
13443 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13444 ins_encode %{
13445 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13446 %}
13447 ins_pipe(ialu_reg);
13448 %}
13449
13450 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13451 %{
13452 predicate(UseAPX);
13453 match(Set dst (OrI (LoadI src1) src2));
13454 effect(KILL cr);
13455 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13456
13457 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13458 ins_encode %{
13459 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13460 %}
13461 ins_pipe(ialu_reg);
13462 %}
13463
13464 // Or Register with Memory
13465 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13466 %{
13467 predicate(!UseAPX);
13468 match(Set dst (OrI dst (LoadI src)));
13469 effect(KILL cr);
13470 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13471
13472 ins_cost(150);
13473 format %{ "orl $dst, $src\t# int" %}
13474 ins_encode %{
13475 __ orl($dst$$Register, $src$$Address);
13476 %}
13477 ins_pipe(ialu_reg_mem);
13478 %}
13479
13480 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13481 %{
13482 predicate(UseAPX);
13483 match(Set dst (OrI src1 (LoadI src2)));
13484 effect(KILL cr);
13485 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13486
13487 ins_cost(150);
13488 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13489 ins_encode %{
13490 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13491 %}
13492 ins_pipe(ialu_reg_mem);
13493 %}
13494
13495 // Or Memory with Register
13496 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13497 %{
13498 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13499 effect(KILL cr);
13500 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13501
13502 ins_cost(150);
13503 format %{ "orb $dst, $src\t# byte" %}
13504 ins_encode %{
13505 __ orb($dst$$Address, $src$$Register);
13506 %}
13507 ins_pipe(ialu_mem_reg);
13508 %}
13509
13510 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13511 %{
13512 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13513 effect(KILL cr);
13514 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13515
13516 ins_cost(150);
13517 format %{ "orl $dst, $src\t# int" %}
13518 ins_encode %{
13519 __ orl($dst$$Address, $src$$Register);
13520 %}
13521 ins_pipe(ialu_mem_reg);
13522 %}
13523
13524 // Or Memory with Immediate
13525 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13526 %{
13527 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13528 effect(KILL cr);
13529 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13530
13531 ins_cost(125);
13532 format %{ "orl $dst, $src\t# int" %}
13533 ins_encode %{
13534 __ orl($dst$$Address, $src$$constant);
13535 %}
13536 ins_pipe(ialu_mem_imm);
13537 %}
13538
13539 // Xor Instructions
13540 // Xor Register with Register
13541 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13542 %{
13543 predicate(!UseAPX);
13544 match(Set dst (XorI dst src));
13545 effect(KILL cr);
13546 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547
13548 format %{ "xorl $dst, $src\t# int" %}
13549 ins_encode %{
13550 __ xorl($dst$$Register, $src$$Register);
13551 %}
13552 ins_pipe(ialu_reg_reg);
13553 %}
13554
13555 // Xor Register with Register using New Data Destination (NDD)
13556 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13557 %{
13558 predicate(UseAPX);
13559 match(Set dst (XorI src1 src2));
13560 effect(KILL cr);
13561 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13562
13563 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13564 ins_encode %{
13565 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13566 %}
13567 ins_pipe(ialu_reg_reg);
13568 %}
13569
13570 // Xor Register with Immediate -1
13571 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13572 %{
13573 predicate(!UseAPX);
13574 match(Set dst (XorI dst imm));
13575
13576 format %{ "notl $dst" %}
13577 ins_encode %{
13578 __ notl($dst$$Register);
13579 %}
13580 ins_pipe(ialu_reg);
13581 %}
13582
13583 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13584 %{
13585 match(Set dst (XorI src imm));
13586 predicate(UseAPX);
13587 flag(PD::Flag_ndd_demotable_opr1);
13588
13589 format %{ "enotl $dst, $src" %}
13590 ins_encode %{
13591 __ enotl($dst$$Register, $src$$Register);
13592 %}
13593 ins_pipe(ialu_reg);
13594 %}
13595
13596 // Xor Register with Immediate
13597 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13598 %{
13599 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13600 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13601 match(Set dst (XorI dst src));
13602 effect(KILL cr);
13603 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13604
13605 format %{ "xorl $dst, $src\t# int" %}
13606 ins_encode %{
13607 __ xorl($dst$$Register, $src$$constant);
13608 %}
13609 ins_pipe(ialu_reg);
13610 %}
13611
13612 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13613 %{
13614 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13615 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13616 match(Set dst (XorI src1 src2));
13617 effect(KILL cr);
13618 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13619
13620 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13621 ins_encode %{
13622 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13623 %}
13624 ins_pipe(ialu_reg);
13625 %}
13626
13627 // Xor Memory with Immediate
13628 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13629 %{
13630 predicate(UseAPX);
13631 match(Set dst (XorI (LoadI src1) src2));
13632 effect(KILL cr);
13633 ins_cost(150);
13634 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13635
13636 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13637 ins_encode %{
13638 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13639 %}
13640 ins_pipe(ialu_reg);
13641 %}
13642
13643 // Xor Register with Memory
13644 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13645 %{
13646 predicate(!UseAPX);
13647 match(Set dst (XorI dst (LoadI src)));
13648 effect(KILL cr);
13649 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13650
13651 ins_cost(150);
13652 format %{ "xorl $dst, $src\t# int" %}
13653 ins_encode %{
13654 __ xorl($dst$$Register, $src$$Address);
13655 %}
13656 ins_pipe(ialu_reg_mem);
13657 %}
13658
13659 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13660 %{
13661 predicate(UseAPX);
13662 match(Set dst (XorI src1 (LoadI src2)));
13663 effect(KILL cr);
13664 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13665
13666 ins_cost(150);
13667 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13668 ins_encode %{
13669 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13670 %}
13671 ins_pipe(ialu_reg_mem);
13672 %}
13673
13674 // Xor Memory with Register
13675 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13676 %{
13677 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13678 effect(KILL cr);
13679 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13680
13681 ins_cost(150);
13682 format %{ "xorb $dst, $src\t# byte" %}
13683 ins_encode %{
13684 __ xorb($dst$$Address, $src$$Register);
13685 %}
13686 ins_pipe(ialu_mem_reg);
13687 %}
13688
13689 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13690 %{
13691 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13692 effect(KILL cr);
13693 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13694
13695 ins_cost(150);
13696 format %{ "xorl $dst, $src\t# int" %}
13697 ins_encode %{
13698 __ xorl($dst$$Address, $src$$Register);
13699 %}
13700 ins_pipe(ialu_mem_reg);
13701 %}
13702
13703 // Xor Memory with Immediate
13704 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13705 %{
13706 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13707 effect(KILL cr);
13708 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13709
13710 ins_cost(125);
13711 format %{ "xorl $dst, $src\t# int" %}
13712 ins_encode %{
13713 __ xorl($dst$$Address, $src$$constant);
13714 %}
13715 ins_pipe(ialu_mem_imm);
13716 %}
13717
13718
13719 // Long Logical Instructions
13720
13721 // And Instructions
13722 // And Register with Register
13723 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13724 %{
13725 predicate(!UseAPX);
13726 match(Set dst (AndL dst src));
13727 effect(KILL cr);
13728 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13729
13730 format %{ "andq $dst, $src\t# long" %}
13731 ins_encode %{
13732 __ andq($dst$$Register, $src$$Register);
13733 %}
13734 ins_pipe(ialu_reg_reg);
13735 %}
13736
13737 // And Register with Register using New Data Destination (NDD)
13738 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13739 %{
13740 predicate(UseAPX);
13741 match(Set dst (AndL src1 src2));
13742 effect(KILL cr);
13743 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13744
13745 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13746 ins_encode %{
13747 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13748
13749 %}
13750 ins_pipe(ialu_reg_reg);
13751 %}
13752
13753 // And Register with Immediate 255
13754 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13755 %{
13756 match(Set dst (AndL src mask));
13757
13758 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13759 ins_encode %{
13760 // movzbl zeroes out the upper 32-bit and does not need REX.W
13761 __ movzbl($dst$$Register, $src$$Register);
13762 %}
13763 ins_pipe(ialu_reg);
13764 %}
13765
13766 // And Register with Immediate 65535
13767 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13768 %{
13769 match(Set dst (AndL src mask));
13770
13771 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13772 ins_encode %{
13773 // movzwl zeroes out the upper 32-bit and does not need REX.W
13774 __ movzwl($dst$$Register, $src$$Register);
13775 %}
13776 ins_pipe(ialu_reg);
13777 %}
13778
13779 // And Register with Immediate
13780 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13781 %{
13782 predicate(!UseAPX);
13783 match(Set dst (AndL dst src));
13784 effect(KILL cr);
13785 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786
13787 format %{ "andq $dst, $src\t# long" %}
13788 ins_encode %{
13789 __ andq($dst$$Register, $src$$constant);
13790 %}
13791 ins_pipe(ialu_reg);
13792 %}
13793
13794 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13795 %{
13796 predicate(UseAPX);
13797 match(Set dst (AndL src1 src2));
13798 effect(KILL cr);
13799 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13800
13801 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13802 ins_encode %{
13803 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13804 %}
13805 ins_pipe(ialu_reg);
13806 %}
13807
13808 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13809 %{
13810 predicate(UseAPX);
13811 match(Set dst (AndL (LoadL src1) src2));
13812 effect(KILL cr);
13813 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13814
13815 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13816 ins_encode %{
13817 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13818 %}
13819 ins_pipe(ialu_reg);
13820 %}
13821
13822 // And Register with Memory
13823 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13824 %{
13825 predicate(!UseAPX);
13826 match(Set dst (AndL dst (LoadL src)));
13827 effect(KILL cr);
13828 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13829
13830 ins_cost(150);
13831 format %{ "andq $dst, $src\t# long" %}
13832 ins_encode %{
13833 __ andq($dst$$Register, $src$$Address);
13834 %}
13835 ins_pipe(ialu_reg_mem);
13836 %}
13837
13838 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13839 %{
13840 predicate(UseAPX);
13841 match(Set dst (AndL src1 (LoadL src2)));
13842 effect(KILL cr);
13843 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13844
13845 ins_cost(150);
13846 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13847 ins_encode %{
13848 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13849 %}
13850 ins_pipe(ialu_reg_mem);
13851 %}
13852
13853 // And Memory with Register
13854 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13855 %{
13856 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13857 effect(KILL cr);
13858 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13859
13860 ins_cost(150);
13861 format %{ "andq $dst, $src\t# long" %}
13862 ins_encode %{
13863 __ andq($dst$$Address, $src$$Register);
13864 %}
13865 ins_pipe(ialu_mem_reg);
13866 %}
13867
13868 // And Memory with Immediate
13869 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13870 %{
13871 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13872 effect(KILL cr);
13873 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13874
13875 ins_cost(125);
13876 format %{ "andq $dst, $src\t# long" %}
13877 ins_encode %{
13878 __ andq($dst$$Address, $src$$constant);
13879 %}
13880 ins_pipe(ialu_mem_imm);
13881 %}
13882
13883 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13884 %{
13885 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13886 // because AND/OR works well enough for 8/32-bit values.
13887 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13888
13889 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13890 effect(KILL cr);
13891
13892 ins_cost(125);
13893 format %{ "btrq $dst, log2(not($con))\t# long" %}
13894 ins_encode %{
13895 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13896 %}
13897 ins_pipe(ialu_mem_imm);
13898 %}
13899
13900 // BMI1 instructions
13901 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13902 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13903 predicate(UseBMI1Instructions);
13904 effect(KILL cr);
13905 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13906
13907 ins_cost(125);
13908 format %{ "andnq $dst, $src1, $src2" %}
13909
13910 ins_encode %{
13911 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13912 %}
13913 ins_pipe(ialu_reg_mem);
13914 %}
13915
13916 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13917 match(Set dst (AndL (XorL src1 minus_1) src2));
13918 predicate(UseBMI1Instructions);
13919 effect(KILL cr);
13920 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13921
13922 format %{ "andnq $dst, $src1, $src2" %}
13923
13924 ins_encode %{
13925 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13926 %}
13927 ins_pipe(ialu_reg_mem);
13928 %}
13929
13930 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13931 match(Set dst (AndL (SubL imm_zero src) src));
13932 predicate(UseBMI1Instructions);
13933 effect(KILL cr);
13934 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13935
13936 format %{ "blsiq $dst, $src" %}
13937
13938 ins_encode %{
13939 __ blsiq($dst$$Register, $src$$Register);
13940 %}
13941 ins_pipe(ialu_reg);
13942 %}
13943
13944 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13945 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13946 predicate(UseBMI1Instructions);
13947 effect(KILL cr);
13948 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13949
13950 ins_cost(125);
13951 format %{ "blsiq $dst, $src" %}
13952
13953 ins_encode %{
13954 __ blsiq($dst$$Register, $src$$Address);
13955 %}
13956 ins_pipe(ialu_reg_mem);
13957 %}
13958
13959 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13960 %{
13961 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13962 predicate(UseBMI1Instructions);
13963 effect(KILL cr);
13964 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13965
13966 ins_cost(125);
13967 format %{ "blsmskq $dst, $src" %}
13968
13969 ins_encode %{
13970 __ blsmskq($dst$$Register, $src$$Address);
13971 %}
13972 ins_pipe(ialu_reg_mem);
13973 %}
13974
13975 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13976 %{
13977 match(Set dst (XorL (AddL src minus_1) src));
13978 predicate(UseBMI1Instructions);
13979 effect(KILL cr);
13980 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13981
13982 format %{ "blsmskq $dst, $src" %}
13983
13984 ins_encode %{
13985 __ blsmskq($dst$$Register, $src$$Register);
13986 %}
13987
13988 ins_pipe(ialu_reg);
13989 %}
13990
13991 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13992 %{
13993 match(Set dst (AndL (AddL src minus_1) src) );
13994 predicate(UseBMI1Instructions);
13995 effect(KILL cr);
13996 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13997
13998 format %{ "blsrq $dst, $src" %}
13999
14000 ins_encode %{
14001 __ blsrq($dst$$Register, $src$$Register);
14002 %}
14003
14004 ins_pipe(ialu_reg);
14005 %}
14006
14007 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
14008 %{
14009 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
14010 predicate(UseBMI1Instructions);
14011 effect(KILL cr);
14012 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
14013
14014 ins_cost(125);
14015 format %{ "blsrq $dst, $src" %}
14016
14017 ins_encode %{
14018 __ blsrq($dst$$Register, $src$$Address);
14019 %}
14020
14021 ins_pipe(ialu_reg);
14022 %}
14023
14024 // Or Instructions
14025 // Or Register with Register
14026 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14027 %{
14028 predicate(!UseAPX);
14029 match(Set dst (OrL dst src));
14030 effect(KILL cr);
14031 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14032
14033 format %{ "orq $dst, $src\t# long" %}
14034 ins_encode %{
14035 __ orq($dst$$Register, $src$$Register);
14036 %}
14037 ins_pipe(ialu_reg_reg);
14038 %}
14039
14040 // Or Register with Register using New Data Destination (NDD)
14041 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14042 %{
14043 predicate(UseAPX);
14044 match(Set dst (OrL src1 src2));
14045 effect(KILL cr);
14046 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14047
14048 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14049 ins_encode %{
14050 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14051
14052 %}
14053 ins_pipe(ialu_reg_reg);
14054 %}
14055
14056 // Use any_RegP to match R15 (TLS register) without spilling.
14057 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14058 match(Set dst (OrL dst (CastP2X src)));
14059 effect(KILL cr);
14060 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14061
14062 format %{ "orq $dst, $src\t# long" %}
14063 ins_encode %{
14064 __ orq($dst$$Register, $src$$Register);
14065 %}
14066 ins_pipe(ialu_reg_reg);
14067 %}
14068
14069 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14070 match(Set dst (OrL src1 (CastP2X src2)));
14071 effect(KILL cr);
14072 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14073
14074 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14075 ins_encode %{
14076 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14077 %}
14078 ins_pipe(ialu_reg_reg);
14079 %}
14080
14081 // Or Register with Immediate
14082 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14083 %{
14084 predicate(!UseAPX);
14085 match(Set dst (OrL dst src));
14086 effect(KILL cr);
14087 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14088
14089 format %{ "orq $dst, $src\t# long" %}
14090 ins_encode %{
14091 __ orq($dst$$Register, $src$$constant);
14092 %}
14093 ins_pipe(ialu_reg);
14094 %}
14095
14096 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14097 %{
14098 predicate(UseAPX);
14099 match(Set dst (OrL src1 src2));
14100 effect(KILL cr);
14101 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14102
14103 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14104 ins_encode %{
14105 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14106 %}
14107 ins_pipe(ialu_reg);
14108 %}
14109
14110 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14111 %{
14112 predicate(UseAPX);
14113 match(Set dst (OrL src1 src2));
14114 effect(KILL cr);
14115 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14116
14117 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14118 ins_encode %{
14119 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14120 %}
14121 ins_pipe(ialu_reg);
14122 %}
14123
14124 // Or Memory with Immediate
14125 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14126 %{
14127 predicate(UseAPX);
14128 match(Set dst (OrL (LoadL src1) src2));
14129 effect(KILL cr);
14130 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14131
14132 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14133 ins_encode %{
14134 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14135 %}
14136 ins_pipe(ialu_reg);
14137 %}
14138
14139 // Or Register with Memory
14140 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14141 %{
14142 predicate(!UseAPX);
14143 match(Set dst (OrL dst (LoadL src)));
14144 effect(KILL cr);
14145 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14146
14147 ins_cost(150);
14148 format %{ "orq $dst, $src\t# long" %}
14149 ins_encode %{
14150 __ orq($dst$$Register, $src$$Address);
14151 %}
14152 ins_pipe(ialu_reg_mem);
14153 %}
14154
14155 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14156 %{
14157 predicate(UseAPX);
14158 match(Set dst (OrL src1 (LoadL src2)));
14159 effect(KILL cr);
14160 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14161
14162 ins_cost(150);
14163 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14164 ins_encode %{
14165 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14166 %}
14167 ins_pipe(ialu_reg_mem);
14168 %}
14169
14170 // Or Memory with Register
14171 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14172 %{
14173 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14174 effect(KILL cr);
14175 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14176
14177 ins_cost(150);
14178 format %{ "orq $dst, $src\t# long" %}
14179 ins_encode %{
14180 __ orq($dst$$Address, $src$$Register);
14181 %}
14182 ins_pipe(ialu_mem_reg);
14183 %}
14184
14185 // Or Memory with Immediate
14186 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14187 %{
14188 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14189 effect(KILL cr);
14190 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14191
14192 ins_cost(125);
14193 format %{ "orq $dst, $src\t# long" %}
14194 ins_encode %{
14195 __ orq($dst$$Address, $src$$constant);
14196 %}
14197 ins_pipe(ialu_mem_imm);
14198 %}
14199
14200 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14201 %{
14202 // con should be a pure 64-bit power of 2 immediate
14203 // because AND/OR works well enough for 8/32-bit values.
14204 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14205
14206 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14207 effect(KILL cr);
14208
14209 ins_cost(125);
14210 format %{ "btsq $dst, log2($con)\t# long" %}
14211 ins_encode %{
14212 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14213 %}
14214 ins_pipe(ialu_mem_imm);
14215 %}
14216
14217 // Xor Instructions
14218 // Xor Register with Register
14219 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14220 %{
14221 predicate(!UseAPX);
14222 match(Set dst (XorL dst src));
14223 effect(KILL cr);
14224 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14225
14226 format %{ "xorq $dst, $src\t# long" %}
14227 ins_encode %{
14228 __ xorq($dst$$Register, $src$$Register);
14229 %}
14230 ins_pipe(ialu_reg_reg);
14231 %}
14232
14233 // Xor Register with Register using New Data Destination (NDD)
14234 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14235 %{
14236 predicate(UseAPX);
14237 match(Set dst (XorL src1 src2));
14238 effect(KILL cr);
14239 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14240
14241 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14242 ins_encode %{
14243 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14244 %}
14245 ins_pipe(ialu_reg_reg);
14246 %}
14247
14248 // Xor Register with Immediate -1
14249 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14250 %{
14251 predicate(!UseAPX);
14252 match(Set dst (XorL dst imm));
14253
14254 format %{ "notq $dst" %}
14255 ins_encode %{
14256 __ notq($dst$$Register);
14257 %}
14258 ins_pipe(ialu_reg);
14259 %}
14260
14261 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14262 %{
14263 predicate(UseAPX);
14264 match(Set dst (XorL src imm));
14265 flag(PD::Flag_ndd_demotable_opr1);
14266
14267 format %{ "enotq $dst, $src" %}
14268 ins_encode %{
14269 __ enotq($dst$$Register, $src$$Register);
14270 %}
14271 ins_pipe(ialu_reg);
14272 %}
14273
14274 // Xor Register with Immediate
14275 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14276 %{
14277 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14278 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14279 match(Set dst (XorL dst src));
14280 effect(KILL cr);
14281 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14282
14283 format %{ "xorq $dst, $src\t# long" %}
14284 ins_encode %{
14285 __ xorq($dst$$Register, $src$$constant);
14286 %}
14287 ins_pipe(ialu_reg);
14288 %}
14289
14290 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14291 %{
14292 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14293 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14294 match(Set dst (XorL src1 src2));
14295 effect(KILL cr);
14296 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14297
14298 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14299 ins_encode %{
14300 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14301 %}
14302 ins_pipe(ialu_reg);
14303 %}
14304
14305 // Xor Memory with Immediate
14306 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14307 %{
14308 predicate(UseAPX);
14309 match(Set dst (XorL (LoadL src1) src2));
14310 effect(KILL cr);
14311 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14312 ins_cost(150);
14313
14314 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14315 ins_encode %{
14316 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14317 %}
14318 ins_pipe(ialu_reg);
14319 %}
14320
14321 // Xor Register with Memory
14322 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14323 %{
14324 predicate(!UseAPX);
14325 match(Set dst (XorL dst (LoadL src)));
14326 effect(KILL cr);
14327 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14328
14329 ins_cost(150);
14330 format %{ "xorq $dst, $src\t# long" %}
14331 ins_encode %{
14332 __ xorq($dst$$Register, $src$$Address);
14333 %}
14334 ins_pipe(ialu_reg_mem);
14335 %}
14336
14337 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14338 %{
14339 predicate(UseAPX);
14340 match(Set dst (XorL src1 (LoadL src2)));
14341 effect(KILL cr);
14342 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14343
14344 ins_cost(150);
14345 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14346 ins_encode %{
14347 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14348 %}
14349 ins_pipe(ialu_reg_mem);
14350 %}
14351
14352 // Xor Memory with Register
14353 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14354 %{
14355 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14356 effect(KILL cr);
14357 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14358
14359 ins_cost(150);
14360 format %{ "xorq $dst, $src\t# long" %}
14361 ins_encode %{
14362 __ xorq($dst$$Address, $src$$Register);
14363 %}
14364 ins_pipe(ialu_mem_reg);
14365 %}
14366
14367 // Xor Memory with Immediate
14368 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14369 %{
14370 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14371 effect(KILL cr);
14372 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14373
14374 ins_cost(125);
14375 format %{ "xorq $dst, $src\t# long" %}
14376 ins_encode %{
14377 __ xorq($dst$$Address, $src$$constant);
14378 %}
14379 ins_pipe(ialu_mem_imm);
14380 %}
14381
14382 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14383 %{
14384 match(Set dst (CmpLTMask p q));
14385 effect(KILL cr);
14386
14387 ins_cost(400);
14388 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14389 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14390 "negl $dst" %}
14391 ins_encode %{
14392 __ cmpl($p$$Register, $q$$Register);
14393 __ setcc(Assembler::less, $dst$$Register);
14394 __ negl($dst$$Register);
14395 %}
14396 ins_pipe(pipe_slow);
14397 %}
14398
14399 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14400 %{
14401 match(Set dst (CmpLTMask dst zero));
14402 effect(KILL cr);
14403
14404 ins_cost(100);
14405 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14406 ins_encode %{
14407 __ sarl($dst$$Register, 31);
14408 %}
14409 ins_pipe(ialu_reg);
14410 %}
14411
14412 /* Better to save a register than avoid a branch */
14413 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14414 %{
14415 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14416 effect(KILL cr);
14417 ins_cost(300);
14418 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14419 "jge done\n\t"
14420 "addl $p,$y\n"
14421 "done: " %}
14422 ins_encode %{
14423 Register Rp = $p$$Register;
14424 Register Rq = $q$$Register;
14425 Register Ry = $y$$Register;
14426 Label done;
14427 __ subl(Rp, Rq);
14428 __ jccb(Assembler::greaterEqual, done);
14429 __ addl(Rp, Ry);
14430 __ bind(done);
14431 %}
14432 ins_pipe(pipe_cmplt);
14433 %}
14434
14435 /* Better to save a register than avoid a branch */
14436 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14437 %{
14438 match(Set y (AndI (CmpLTMask p q) y));
14439 effect(KILL cr);
14440
14441 ins_cost(300);
14442
14443 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14444 "jlt done\n\t"
14445 "xorl $y, $y\n"
14446 "done: " %}
14447 ins_encode %{
14448 Register Rp = $p$$Register;
14449 Register Rq = $q$$Register;
14450 Register Ry = $y$$Register;
14451 Label done;
14452 __ cmpl(Rp, Rq);
14453 __ jccb(Assembler::less, done);
14454 __ xorl(Ry, Ry);
14455 __ bind(done);
14456 %}
14457 ins_pipe(pipe_cmplt);
14458 %}
14459
14460
14461 //---------- FP Instructions------------------------------------------------
14462
14463 // Really expensive, avoid
14464 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14465 %{
14466 match(Set cr (CmpF src1 src2));
14467
14468 ins_cost(500);
14469 format %{ "ucomiss $src1, $src2\n\t"
14470 "jnp,s exit\n\t"
14471 "pushfq\t# saw NaN, set CF\n\t"
14472 "andq [rsp], #0xffffff2b\n\t"
14473 "popfq\n"
14474 "exit:" %}
14475 ins_encode %{
14476 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14477 emit_cmpfp_fixup(masm);
14478 %}
14479 ins_pipe(pipe_slow);
14480 %}
14481
14482 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14483 match(Set cr (CmpF src1 src2));
14484
14485 ins_cost(100);
14486 format %{ "ucomiss $src1, $src2" %}
14487 ins_encode %{
14488 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14489 %}
14490 ins_pipe(pipe_slow);
14491 %}
14492
14493 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14494 match(Set cr (CmpF src1 src2));
14495
14496 ins_cost(100);
14497 format %{ "evucomxss $src1, $src2" %}
14498 ins_encode %{
14499 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14500 %}
14501 ins_pipe(pipe_slow);
14502 %}
14503
14504 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14505 match(Set cr (CmpF src1 (LoadF src2)));
14506
14507 ins_cost(100);
14508 format %{ "ucomiss $src1, $src2" %}
14509 ins_encode %{
14510 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14511 %}
14512 ins_pipe(pipe_slow);
14513 %}
14514
14515 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14516 match(Set cr (CmpF src1 (LoadF src2)));
14517
14518 ins_cost(100);
14519 format %{ "evucomxss $src1, $src2" %}
14520 ins_encode %{
14521 __ evucomxss($src1$$XMMRegister, $src2$$Address);
14522 %}
14523 ins_pipe(pipe_slow);
14524 %}
14525
14526 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14527 match(Set cr (CmpF src con));
14528
14529 ins_cost(100);
14530 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14531 ins_encode %{
14532 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14533 %}
14534 ins_pipe(pipe_slow);
14535 %}
14536
14537 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14538 match(Set cr (CmpF src con));
14539
14540 ins_cost(100);
14541 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14542 ins_encode %{
14543 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14544 %}
14545 ins_pipe(pipe_slow);
14546 %}
14547
14548 // Really expensive, avoid
14549 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14550 %{
14551 match(Set cr (CmpD src1 src2));
14552
14553 ins_cost(500);
14554 format %{ "ucomisd $src1, $src2\n\t"
14555 "jnp,s exit\n\t"
14556 "pushfq\t# saw NaN, set CF\n\t"
14557 "andq [rsp], #0xffffff2b\n\t"
14558 "popfq\n"
14559 "exit:" %}
14560 ins_encode %{
14561 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14562 emit_cmpfp_fixup(masm);
14563 %}
14564 ins_pipe(pipe_slow);
14565 %}
14566
14567 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14568 match(Set cr (CmpD src1 src2));
14569
14570 ins_cost(100);
14571 format %{ "ucomisd $src1, $src2 test" %}
14572 ins_encode %{
14573 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14574 %}
14575 ins_pipe(pipe_slow);
14576 %}
14577
14578 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14579 match(Set cr (CmpD src1 src2));
14580
14581 ins_cost(100);
14582 format %{ "evucomxsd $src1, $src2 test" %}
14583 ins_encode %{
14584 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14585 %}
14586 ins_pipe(pipe_slow);
14587 %}
14588
14589 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14590 match(Set cr (CmpD src1 (LoadD src2)));
14591
14592 ins_cost(100);
14593 format %{ "ucomisd $src1, $src2" %}
14594 ins_encode %{
14595 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14596 %}
14597 ins_pipe(pipe_slow);
14598 %}
14599
14600 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14601 match(Set cr (CmpD src1 (LoadD src2)));
14602
14603 ins_cost(100);
14604 format %{ "evucomxsd $src1, $src2" %}
14605 ins_encode %{
14606 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14607 %}
14608 ins_pipe(pipe_slow);
14609 %}
14610
14611 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14612 match(Set cr (CmpD src con));
14613 ins_cost(100);
14614 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14615 ins_encode %{
14616 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14617 %}
14618 ins_pipe(pipe_slow);
14619 %}
14620
14621 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14622 match(Set cr (CmpD src con));
14623
14624 ins_cost(100);
14625 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14626 ins_encode %{
14627 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14628 %}
14629 ins_pipe(pipe_slow);
14630 %}
14631
14632 // Compare into -1,0,1
14633 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14634 %{
14635 match(Set dst (CmpF3 src1 src2));
14636 effect(KILL cr);
14637
14638 ins_cost(275);
14639 format %{ "ucomiss $src1, $src2\n\t"
14640 "movl $dst, #-1\n\t"
14641 "jp,s done\n\t"
14642 "jb,s done\n\t"
14643 "setne $dst\n\t"
14644 "movzbl $dst, $dst\n"
14645 "done:" %}
14646 ins_encode %{
14647 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14648 emit_cmpfp3(masm, $dst$$Register);
14649 %}
14650 ins_pipe(pipe_slow);
14651 %}
14652
14653 // Compare into -1,0,1
14654 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14655 %{
14656 match(Set dst (CmpF3 src1 (LoadF src2)));
14657 effect(KILL cr);
14658
14659 ins_cost(275);
14660 format %{ "ucomiss $src1, $src2\n\t"
14661 "movl $dst, #-1\n\t"
14662 "jp,s done\n\t"
14663 "jb,s done\n\t"
14664 "setne $dst\n\t"
14665 "movzbl $dst, $dst\n"
14666 "done:" %}
14667 ins_encode %{
14668 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14669 emit_cmpfp3(masm, $dst$$Register);
14670 %}
14671 ins_pipe(pipe_slow);
14672 %}
14673
14674 // Compare into -1,0,1
14675 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14676 match(Set dst (CmpF3 src con));
14677 effect(KILL cr);
14678
14679 ins_cost(275);
14680 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14681 "movl $dst, #-1\n\t"
14682 "jp,s done\n\t"
14683 "jb,s done\n\t"
14684 "setne $dst\n\t"
14685 "movzbl $dst, $dst\n"
14686 "done:" %}
14687 ins_encode %{
14688 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14689 emit_cmpfp3(masm, $dst$$Register);
14690 %}
14691 ins_pipe(pipe_slow);
14692 %}
14693
14694 // Compare into -1,0,1
14695 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14696 %{
14697 match(Set dst (CmpD3 src1 src2));
14698 effect(KILL cr);
14699
14700 ins_cost(275);
14701 format %{ "ucomisd $src1, $src2\n\t"
14702 "movl $dst, #-1\n\t"
14703 "jp,s done\n\t"
14704 "jb,s done\n\t"
14705 "setne $dst\n\t"
14706 "movzbl $dst, $dst\n"
14707 "done:" %}
14708 ins_encode %{
14709 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14710 emit_cmpfp3(masm, $dst$$Register);
14711 %}
14712 ins_pipe(pipe_slow);
14713 %}
14714
14715 // Compare into -1,0,1
14716 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14717 %{
14718 match(Set dst (CmpD3 src1 (LoadD src2)));
14719 effect(KILL cr);
14720
14721 ins_cost(275);
14722 format %{ "ucomisd $src1, $src2\n\t"
14723 "movl $dst, #-1\n\t"
14724 "jp,s done\n\t"
14725 "jb,s done\n\t"
14726 "setne $dst\n\t"
14727 "movzbl $dst, $dst\n"
14728 "done:" %}
14729 ins_encode %{
14730 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14731 emit_cmpfp3(masm, $dst$$Register);
14732 %}
14733 ins_pipe(pipe_slow);
14734 %}
14735
14736 // Compare into -1,0,1
14737 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14738 match(Set dst (CmpD3 src con));
14739 effect(KILL cr);
14740
14741 ins_cost(275);
14742 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14743 "movl $dst, #-1\n\t"
14744 "jp,s done\n\t"
14745 "jb,s done\n\t"
14746 "setne $dst\n\t"
14747 "movzbl $dst, $dst\n"
14748 "done:" %}
14749 ins_encode %{
14750 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14751 emit_cmpfp3(masm, $dst$$Register);
14752 %}
14753 ins_pipe(pipe_slow);
14754 %}
14755
14756 //----------Arithmetic Conversion Instructions---------------------------------
14757
14758 instruct convF2D_reg_reg(regD dst, regF src)
14759 %{
14760 match(Set dst (ConvF2D src));
14761
14762 format %{ "cvtss2sd $dst, $src" %}
14763 ins_encode %{
14764 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14765 %}
14766 ins_pipe(pipe_slow); // XXX
14767 %}
14768
14769 instruct convF2D_reg_mem(regD dst, memory src)
14770 %{
14771 predicate(UseAVX == 0);
14772 match(Set dst (ConvF2D (LoadF src)));
14773
14774 format %{ "cvtss2sd $dst, $src" %}
14775 ins_encode %{
14776 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14777 %}
14778 ins_pipe(pipe_slow); // XXX
14779 %}
14780
14781 instruct convD2F_reg_reg(regF dst, regD src)
14782 %{
14783 match(Set dst (ConvD2F src));
14784
14785 format %{ "cvtsd2ss $dst, $src" %}
14786 ins_encode %{
14787 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14788 %}
14789 ins_pipe(pipe_slow); // XXX
14790 %}
14791
14792 instruct convD2F_reg_mem(regF dst, memory src)
14793 %{
14794 predicate(UseAVX == 0);
14795 match(Set dst (ConvD2F (LoadD src)));
14796
14797 format %{ "cvtsd2ss $dst, $src" %}
14798 ins_encode %{
14799 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14800 %}
14801 ins_pipe(pipe_slow); // XXX
14802 %}
14803
14804 // XXX do mem variants
14805 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14806 %{
14807 predicate(!VM_Version::supports_avx10_2());
14808 match(Set dst (ConvF2I src));
14809 effect(KILL cr);
14810 format %{ "convert_f2i $dst, $src" %}
14811 ins_encode %{
14812 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14813 %}
14814 ins_pipe(pipe_slow);
14815 %}
14816
14817 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14818 %{
14819 predicate(VM_Version::supports_avx10_2());
14820 match(Set dst (ConvF2I src));
14821 format %{ "evcvttss2sisl $dst, $src" %}
14822 ins_encode %{
14823 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14824 %}
14825 ins_pipe(pipe_slow);
14826 %}
14827
14828 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14829 %{
14830 predicate(VM_Version::supports_avx10_2());
14831 match(Set dst (ConvF2I (LoadF src)));
14832 format %{ "evcvttss2sisl $dst, $src" %}
14833 ins_encode %{
14834 __ evcvttss2sisl($dst$$Register, $src$$Address);
14835 %}
14836 ins_pipe(pipe_slow);
14837 %}
14838
14839 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14840 %{
14841 predicate(!VM_Version::supports_avx10_2());
14842 match(Set dst (ConvF2L src));
14843 effect(KILL cr);
14844 format %{ "convert_f2l $dst, $src"%}
14845 ins_encode %{
14846 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14847 %}
14848 ins_pipe(pipe_slow);
14849 %}
14850
14851 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14852 %{
14853 predicate(VM_Version::supports_avx10_2());
14854 match(Set dst (ConvF2L src));
14855 format %{ "evcvttss2sisq $dst, $src" %}
14856 ins_encode %{
14857 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14858 %}
14859 ins_pipe(pipe_slow);
14860 %}
14861
14862 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14863 %{
14864 predicate(VM_Version::supports_avx10_2());
14865 match(Set dst (ConvF2L (LoadF src)));
14866 format %{ "evcvttss2sisq $dst, $src" %}
14867 ins_encode %{
14868 __ evcvttss2sisq($dst$$Register, $src$$Address);
14869 %}
14870 ins_pipe(pipe_slow);
14871 %}
14872
14873 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14874 %{
14875 predicate(!VM_Version::supports_avx10_2());
14876 match(Set dst (ConvD2I src));
14877 effect(KILL cr);
14878 format %{ "convert_d2i $dst, $src"%}
14879 ins_encode %{
14880 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14881 %}
14882 ins_pipe(pipe_slow);
14883 %}
14884
14885 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14886 %{
14887 predicate(VM_Version::supports_avx10_2());
14888 match(Set dst (ConvD2I src));
14889 format %{ "evcvttsd2sisl $dst, $src" %}
14890 ins_encode %{
14891 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14892 %}
14893 ins_pipe(pipe_slow);
14894 %}
14895
14896 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14897 %{
14898 predicate(VM_Version::supports_avx10_2());
14899 match(Set dst (ConvD2I (LoadD src)));
14900 format %{ "evcvttsd2sisl $dst, $src" %}
14901 ins_encode %{
14902 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14903 %}
14904 ins_pipe(pipe_slow);
14905 %}
14906
14907 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14908 %{
14909 predicate(!VM_Version::supports_avx10_2());
14910 match(Set dst (ConvD2L src));
14911 effect(KILL cr);
14912 format %{ "convert_d2l $dst, $src"%}
14913 ins_encode %{
14914 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14915 %}
14916 ins_pipe(pipe_slow);
14917 %}
14918
14919 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14920 %{
14921 predicate(VM_Version::supports_avx10_2());
14922 match(Set dst (ConvD2L src));
14923 format %{ "evcvttsd2sisq $dst, $src" %}
14924 ins_encode %{
14925 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14926 %}
14927 ins_pipe(pipe_slow);
14928 %}
14929
14930 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14931 %{
14932 predicate(VM_Version::supports_avx10_2());
14933 match(Set dst (ConvD2L (LoadD src)));
14934 format %{ "evcvttsd2sisq $dst, $src" %}
14935 ins_encode %{
14936 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14937 %}
14938 ins_pipe(pipe_slow);
14939 %}
14940
14941 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14942 %{
14943 match(Set dst (RoundD src));
14944 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14945 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14946 ins_encode %{
14947 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14948 %}
14949 ins_pipe(pipe_slow);
14950 %}
14951
14952 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14953 %{
14954 match(Set dst (RoundF src));
14955 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14956 format %{ "round_float $dst,$src" %}
14957 ins_encode %{
14958 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14959 %}
14960 ins_pipe(pipe_slow);
14961 %}
14962
14963 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14964 %{
14965 predicate(!UseXmmI2F);
14966 match(Set dst (ConvI2F src));
14967
14968 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14969 ins_encode %{
14970 if (UseAVX > 0) {
14971 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14972 }
14973 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14974 %}
14975 ins_pipe(pipe_slow); // XXX
14976 %}
14977
14978 instruct convI2F_reg_mem(regF dst, memory src)
14979 %{
14980 predicate(UseAVX == 0);
14981 match(Set dst (ConvI2F (LoadI src)));
14982
14983 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14984 ins_encode %{
14985 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14986 %}
14987 ins_pipe(pipe_slow); // XXX
14988 %}
14989
14990 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14991 %{
14992 predicate(!UseXmmI2D);
14993 match(Set dst (ConvI2D src));
14994
14995 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14996 ins_encode %{
14997 if (UseAVX > 0) {
14998 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14999 }
15000 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
15001 %}
15002 ins_pipe(pipe_slow); // XXX
15003 %}
15004
15005 instruct convI2D_reg_mem(regD dst, memory src)
15006 %{
15007 predicate(UseAVX == 0);
15008 match(Set dst (ConvI2D (LoadI src)));
15009
15010 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
15011 ins_encode %{
15012 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
15013 %}
15014 ins_pipe(pipe_slow); // XXX
15015 %}
15016
15017 instruct convXI2F_reg(regF dst, rRegI src)
15018 %{
15019 predicate(UseXmmI2F);
15020 match(Set dst (ConvI2F src));
15021
15022 format %{ "movdl $dst, $src\n\t"
15023 "cvtdq2psl $dst, $dst\t# i2f" %}
15024 ins_encode %{
15025 __ movdl($dst$$XMMRegister, $src$$Register);
15026 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
15027 %}
15028 ins_pipe(pipe_slow); // XXX
15029 %}
15030
15031 instruct convXI2D_reg(regD dst, rRegI src)
15032 %{
15033 predicate(UseXmmI2D);
15034 match(Set dst (ConvI2D src));
15035
15036 format %{ "movdl $dst, $src\n\t"
15037 "cvtdq2pdl $dst, $dst\t# i2d" %}
15038 ins_encode %{
15039 __ movdl($dst$$XMMRegister, $src$$Register);
15040 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15041 %}
15042 ins_pipe(pipe_slow); // XXX
15043 %}
15044
15045 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15046 %{
15047 match(Set dst (ConvL2F src));
15048
15049 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15050 ins_encode %{
15051 if (UseAVX > 0) {
15052 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15053 }
15054 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15055 %}
15056 ins_pipe(pipe_slow); // XXX
15057 %}
15058
15059 instruct convL2F_reg_mem(regF dst, memory src)
15060 %{
15061 predicate(UseAVX == 0);
15062 match(Set dst (ConvL2F (LoadL src)));
15063
15064 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15065 ins_encode %{
15066 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15067 %}
15068 ins_pipe(pipe_slow); // XXX
15069 %}
15070
15071 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15072 %{
15073 match(Set dst (ConvL2D src));
15074
15075 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15076 ins_encode %{
15077 if (UseAVX > 0) {
15078 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15079 }
15080 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15081 %}
15082 ins_pipe(pipe_slow); // XXX
15083 %}
15084
15085 instruct convL2D_reg_mem(regD dst, memory src)
15086 %{
15087 predicate(UseAVX == 0);
15088 match(Set dst (ConvL2D (LoadL src)));
15089
15090 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15091 ins_encode %{
15092 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15093 %}
15094 ins_pipe(pipe_slow); // XXX
15095 %}
15096
15097 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15098 %{
15099 match(Set dst (ConvI2L src));
15100
15101 ins_cost(125);
15102 format %{ "movslq $dst, $src\t# i2l" %}
15103 ins_encode %{
15104 __ movslq($dst$$Register, $src$$Register);
15105 %}
15106 ins_pipe(ialu_reg_reg);
15107 %}
15108
15109 // Zero-extend convert int to long
15110 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15111 %{
15112 match(Set dst (AndL (ConvI2L src) mask));
15113
15114 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15115 ins_encode %{
15116 if ($dst$$reg != $src$$reg) {
15117 __ movl($dst$$Register, $src$$Register);
15118 }
15119 %}
15120 ins_pipe(ialu_reg_reg);
15121 %}
15122
15123 // Zero-extend convert int to long
15124 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15125 %{
15126 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15127
15128 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15129 ins_encode %{
15130 __ movl($dst$$Register, $src$$Address);
15131 %}
15132 ins_pipe(ialu_reg_mem);
15133 %}
15134
15135 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15136 %{
15137 match(Set dst (AndL src mask));
15138
15139 format %{ "movl $dst, $src\t# zero-extend long" %}
15140 ins_encode %{
15141 __ movl($dst$$Register, $src$$Register);
15142 %}
15143 ins_pipe(ialu_reg_reg);
15144 %}
15145
15146 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15147 %{
15148 match(Set dst (ConvL2I src));
15149
15150 format %{ "movl $dst, $src\t# l2i" %}
15151 ins_encode %{
15152 __ movl($dst$$Register, $src$$Register);
15153 %}
15154 ins_pipe(ialu_reg_reg);
15155 %}
15156
15157
15158 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15159 match(Set dst (MoveF2I src));
15160 effect(DEF dst, USE src);
15161
15162 ins_cost(125);
15163 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15164 ins_encode %{
15165 __ movl($dst$$Register, Address(rsp, $src$$disp));
15166 %}
15167 ins_pipe(ialu_reg_mem);
15168 %}
15169
15170 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15171 match(Set dst (MoveI2F src));
15172 effect(DEF dst, USE src);
15173
15174 ins_cost(125);
15175 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15176 ins_encode %{
15177 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15178 %}
15179 ins_pipe(pipe_slow);
15180 %}
15181
15182 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15183 match(Set dst (MoveD2L src));
15184 effect(DEF dst, USE src);
15185
15186 ins_cost(125);
15187 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15188 ins_encode %{
15189 __ movq($dst$$Register, Address(rsp, $src$$disp));
15190 %}
15191 ins_pipe(ialu_reg_mem);
15192 %}
15193
15194 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15195 predicate(!UseXmmLoadAndClearUpper);
15196 match(Set dst (MoveL2D src));
15197 effect(DEF dst, USE src);
15198
15199 ins_cost(125);
15200 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15201 ins_encode %{
15202 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15203 %}
15204 ins_pipe(pipe_slow);
15205 %}
15206
15207 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15208 predicate(UseXmmLoadAndClearUpper);
15209 match(Set dst (MoveL2D src));
15210 effect(DEF dst, USE src);
15211
15212 ins_cost(125);
15213 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15214 ins_encode %{
15215 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15216 %}
15217 ins_pipe(pipe_slow);
15218 %}
15219
15220
15221 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15222 match(Set dst (MoveF2I src));
15223 effect(DEF dst, USE src);
15224
15225 ins_cost(95); // XXX
15226 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15227 ins_encode %{
15228 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15229 %}
15230 ins_pipe(pipe_slow);
15231 %}
15232
15233 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15234 match(Set dst (MoveI2F src));
15235 effect(DEF dst, USE src);
15236
15237 ins_cost(100);
15238 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15239 ins_encode %{
15240 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15241 %}
15242 ins_pipe( ialu_mem_reg );
15243 %}
15244
15245 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15246 match(Set dst (MoveD2L src));
15247 effect(DEF dst, USE src);
15248
15249 ins_cost(95); // XXX
15250 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15251 ins_encode %{
15252 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15253 %}
15254 ins_pipe(pipe_slow);
15255 %}
15256
15257 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15258 match(Set dst (MoveL2D src));
15259 effect(DEF dst, USE src);
15260
15261 ins_cost(100);
15262 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15263 ins_encode %{
15264 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15265 %}
15266 ins_pipe(ialu_mem_reg);
15267 %}
15268
15269 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15270 match(Set dst (MoveF2I src));
15271 effect(DEF dst, USE src);
15272 ins_cost(85);
15273 format %{ "movd $dst,$src\t# MoveF2I" %}
15274 ins_encode %{
15275 __ movdl($dst$$Register, $src$$XMMRegister);
15276 %}
15277 ins_pipe( pipe_slow );
15278 %}
15279
15280 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15281 match(Set dst (MoveD2L src));
15282 effect(DEF dst, USE src);
15283 ins_cost(85);
15284 format %{ "movd $dst,$src\t# MoveD2L" %}
15285 ins_encode %{
15286 __ movdq($dst$$Register, $src$$XMMRegister);
15287 %}
15288 ins_pipe( pipe_slow );
15289 %}
15290
15291 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15292 match(Set dst (MoveI2F src));
15293 effect(DEF dst, USE src);
15294 ins_cost(100);
15295 format %{ "movd $dst,$src\t# MoveI2F" %}
15296 ins_encode %{
15297 __ movdl($dst$$XMMRegister, $src$$Register);
15298 %}
15299 ins_pipe( pipe_slow );
15300 %}
15301
15302 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15303 match(Set dst (MoveL2D src));
15304 effect(DEF dst, USE src);
15305 ins_cost(100);
15306 format %{ "movd $dst,$src\t# MoveL2D" %}
15307 ins_encode %{
15308 __ movdq($dst$$XMMRegister, $src$$Register);
15309 %}
15310 ins_pipe( pipe_slow );
15311 %}
15312
15313
15314 // Fast clearing of an array
15315 // Small non-constant lenght ClearArray for non-AVX512 targets.
15316 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15317 Universe dummy, rFlagsReg cr)
15318 %{
15319 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15320 match(Set dummy (ClearArray (Binary cnt base) val));
15321 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15322
15323 format %{ $$template
15324 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15325 $$emit$$"jg LARGE\n\t"
15326 $$emit$$"dec rcx\n\t"
15327 $$emit$$"js DONE\t# Zero length\n\t"
15328 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15329 $$emit$$"dec rcx\n\t"
15330 $$emit$$"jge LOOP\n\t"
15331 $$emit$$"jmp DONE\n\t"
15332 $$emit$$"# LARGE:\n\t"
15333 if (UseFastStosb) {
15334 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15335 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15336 } else if (UseXMMForObjInit) {
15337 $$emit$$"movdq $tmp, $val\n\t"
15338 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15339 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15340 $$emit$$"jmpq L_zero_64_bytes\n\t"
15341 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15342 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15343 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15344 $$emit$$"add 0x40,rax\n\t"
15345 $$emit$$"# L_zero_64_bytes:\n\t"
15346 $$emit$$"sub 0x8,rcx\n\t"
15347 $$emit$$"jge L_loop\n\t"
15348 $$emit$$"add 0x4,rcx\n\t"
15349 $$emit$$"jl L_tail\n\t"
15350 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15351 $$emit$$"add 0x20,rax\n\t"
15352 $$emit$$"sub 0x4,rcx\n\t"
15353 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15354 $$emit$$"add 0x4,rcx\n\t"
15355 $$emit$$"jle L_end\n\t"
15356 $$emit$$"dec rcx\n\t"
15357 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15358 $$emit$$"vmovq xmm0,(rax)\n\t"
15359 $$emit$$"add 0x8,rax\n\t"
15360 $$emit$$"dec rcx\n\t"
15361 $$emit$$"jge L_sloop\n\t"
15362 $$emit$$"# L_end:\n\t"
15363 } else {
15364 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15365 }
15366 $$emit$$"# DONE"
15367 %}
15368 ins_encode %{
15369 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15370 $tmp$$XMMRegister, false, false);
15371 %}
15372 ins_pipe(pipe_slow);
15373 %}
15374
15375 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15376 Universe dummy, rFlagsReg cr)
15377 %{
15378 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15379 match(Set dummy (ClearArray (Binary cnt base) val));
15380 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15381
15382 format %{ $$template
15383 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15384 $$emit$$"jg LARGE\n\t"
15385 $$emit$$"dec rcx\n\t"
15386 $$emit$$"js DONE\t# Zero length\n\t"
15387 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15388 $$emit$$"dec rcx\n\t"
15389 $$emit$$"jge LOOP\n\t"
15390 $$emit$$"jmp DONE\n\t"
15391 $$emit$$"# LARGE:\n\t"
15392 if (UseXMMForObjInit) {
15393 $$emit$$"movdq $tmp, $val\n\t"
15394 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15395 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15396 $$emit$$"jmpq L_zero_64_bytes\n\t"
15397 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15398 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15399 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15400 $$emit$$"add 0x40,rax\n\t"
15401 $$emit$$"# L_zero_64_bytes:\n\t"
15402 $$emit$$"sub 0x8,rcx\n\t"
15403 $$emit$$"jge L_loop\n\t"
15404 $$emit$$"add 0x4,rcx\n\t"
15405 $$emit$$"jl L_tail\n\t"
15406 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15407 $$emit$$"add 0x20,rax\n\t"
15408 $$emit$$"sub 0x4,rcx\n\t"
15409 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15410 $$emit$$"add 0x4,rcx\n\t"
15411 $$emit$$"jle L_end\n\t"
15412 $$emit$$"dec rcx\n\t"
15413 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15414 $$emit$$"vmovq xmm0,(rax)\n\t"
15415 $$emit$$"add 0x8,rax\n\t"
15416 $$emit$$"dec rcx\n\t"
15417 $$emit$$"jge L_sloop\n\t"
15418 $$emit$$"# L_end:\n\t"
15419 } else {
15420 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15421 }
15422 $$emit$$"# DONE"
15423 %}
15424 ins_encode %{
15425 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15426 $tmp$$XMMRegister, false, true);
15427 %}
15428 ins_pipe(pipe_slow);
15429 %}
15430
15431 // Small non-constant length ClearArray for AVX512 targets.
15432 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15433 Universe dummy, rFlagsReg cr)
15434 %{
15435 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15436 match(Set dummy (ClearArray (Binary cnt base) val));
15437 ins_cost(125);
15438 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15439
15440 format %{ $$template
15441 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15442 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15443 $$emit$$"jg LARGE\n\t"
15444 $$emit$$"dec rcx\n\t"
15445 $$emit$$"js DONE\t# Zero length\n\t"
15446 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15447 $$emit$$"dec rcx\n\t"
15448 $$emit$$"jge LOOP\n\t"
15449 $$emit$$"jmp DONE\n\t"
15450 $$emit$$"# LARGE:\n\t"
15451 if (UseFastStosb) {
15452 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15453 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15454 } else if (UseXMMForObjInit) {
15455 $$emit$$"mov rdi,rax\n\t"
15456 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15457 $$emit$$"jmpq L_zero_64_bytes\n\t"
15458 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15459 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15460 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15461 $$emit$$"add 0x40,rax\n\t"
15462 $$emit$$"# L_zero_64_bytes:\n\t"
15463 $$emit$$"sub 0x8,rcx\n\t"
15464 $$emit$$"jge L_loop\n\t"
15465 $$emit$$"add 0x4,rcx\n\t"
15466 $$emit$$"jl L_tail\n\t"
15467 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15468 $$emit$$"add 0x20,rax\n\t"
15469 $$emit$$"sub 0x4,rcx\n\t"
15470 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15471 $$emit$$"add 0x4,rcx\n\t"
15472 $$emit$$"jle L_end\n\t"
15473 $$emit$$"dec rcx\n\t"
15474 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15475 $$emit$$"vmovq xmm0,(rax)\n\t"
15476 $$emit$$"add 0x8,rax\n\t"
15477 $$emit$$"dec rcx\n\t"
15478 $$emit$$"jge L_sloop\n\t"
15479 $$emit$$"# L_end:\n\t"
15480 } else {
15481 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15482 }
15483 $$emit$$"# DONE"
15484 %}
15485 ins_encode %{
15486 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15487 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15488 %}
15489 ins_pipe(pipe_slow);
15490 %}
15491
15492 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15493 Universe dummy, rFlagsReg cr)
15494 %{
15495 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15496 match(Set dummy (ClearArray (Binary cnt base) val));
15497 ins_cost(125);
15498 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15499
15500 format %{ $$template
15501 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15502 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15503 $$emit$$"jg LARGE\n\t"
15504 $$emit$$"dec rcx\n\t"
15505 $$emit$$"js DONE\t# Zero length\n\t"
15506 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15507 $$emit$$"dec rcx\n\t"
15508 $$emit$$"jge LOOP\n\t"
15509 $$emit$$"jmp DONE\n\t"
15510 $$emit$$"# LARGE:\n\t"
15511 if (UseFastStosb) {
15512 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15513 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15514 } else if (UseXMMForObjInit) {
15515 $$emit$$"mov rdi,rax\n\t"
15516 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15517 $$emit$$"jmpq L_zero_64_bytes\n\t"
15518 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15519 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15520 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15521 $$emit$$"add 0x40,rax\n\t"
15522 $$emit$$"# L_zero_64_bytes:\n\t"
15523 $$emit$$"sub 0x8,rcx\n\t"
15524 $$emit$$"jge L_loop\n\t"
15525 $$emit$$"add 0x4,rcx\n\t"
15526 $$emit$$"jl L_tail\n\t"
15527 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15528 $$emit$$"add 0x20,rax\n\t"
15529 $$emit$$"sub 0x4,rcx\n\t"
15530 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15531 $$emit$$"add 0x4,rcx\n\t"
15532 $$emit$$"jle L_end\n\t"
15533 $$emit$$"dec rcx\n\t"
15534 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15535 $$emit$$"vmovq xmm0,(rax)\n\t"
15536 $$emit$$"add 0x8,rax\n\t"
15537 $$emit$$"dec rcx\n\t"
15538 $$emit$$"jge L_sloop\n\t"
15539 $$emit$$"# L_end:\n\t"
15540 } else {
15541 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15542 }
15543 $$emit$$"# DONE"
15544 %}
15545 ins_encode %{
15546 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15547 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15548 %}
15549 ins_pipe(pipe_slow);
15550 %}
15551
15552 // Large non-constant length ClearArray for non-AVX512 targets.
15553 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15554 Universe dummy, rFlagsReg cr)
15555 %{
15556 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15557 match(Set dummy (ClearArray (Binary cnt base) val));
15558 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15559
15560 format %{ $$template
15561 if (UseFastStosb) {
15562 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15563 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15564 } else if (UseXMMForObjInit) {
15565 $$emit$$"movdq $tmp, $val\n\t"
15566 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15567 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15568 $$emit$$"jmpq L_zero_64_bytes\n\t"
15569 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15570 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15571 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15572 $$emit$$"add 0x40,rax\n\t"
15573 $$emit$$"# L_zero_64_bytes:\n\t"
15574 $$emit$$"sub 0x8,rcx\n\t"
15575 $$emit$$"jge L_loop\n\t"
15576 $$emit$$"add 0x4,rcx\n\t"
15577 $$emit$$"jl L_tail\n\t"
15578 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15579 $$emit$$"add 0x20,rax\n\t"
15580 $$emit$$"sub 0x4,rcx\n\t"
15581 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15582 $$emit$$"add 0x4,rcx\n\t"
15583 $$emit$$"jle L_end\n\t"
15584 $$emit$$"dec rcx\n\t"
15585 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15586 $$emit$$"vmovq xmm0,(rax)\n\t"
15587 $$emit$$"add 0x8,rax\n\t"
15588 $$emit$$"dec rcx\n\t"
15589 $$emit$$"jge L_sloop\n\t"
15590 $$emit$$"# L_end:\n\t"
15591 } else {
15592 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15593 }
15594 %}
15595 ins_encode %{
15596 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15597 $tmp$$XMMRegister, true, false);
15598 %}
15599 ins_pipe(pipe_slow);
15600 %}
15601
15602 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15603 Universe dummy, rFlagsReg cr)
15604 %{
15605 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15606 match(Set dummy (ClearArray (Binary cnt base) val));
15607 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15608
15609 format %{ $$template
15610 if (UseXMMForObjInit) {
15611 $$emit$$"movdq $tmp, $val\n\t"
15612 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15613 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15614 $$emit$$"jmpq L_zero_64_bytes\n\t"
15615 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15616 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15617 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15618 $$emit$$"add 0x40,rax\n\t"
15619 $$emit$$"# L_zero_64_bytes:\n\t"
15620 $$emit$$"sub 0x8,rcx\n\t"
15621 $$emit$$"jge L_loop\n\t"
15622 $$emit$$"add 0x4,rcx\n\t"
15623 $$emit$$"jl L_tail\n\t"
15624 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15625 $$emit$$"add 0x20,rax\n\t"
15626 $$emit$$"sub 0x4,rcx\n\t"
15627 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15628 $$emit$$"add 0x4,rcx\n\t"
15629 $$emit$$"jle L_end\n\t"
15630 $$emit$$"dec rcx\n\t"
15631 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15632 $$emit$$"vmovq xmm0,(rax)\n\t"
15633 $$emit$$"add 0x8,rax\n\t"
15634 $$emit$$"dec rcx\n\t"
15635 $$emit$$"jge L_sloop\n\t"
15636 $$emit$$"# L_end:\n\t"
15637 } else {
15638 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15639 }
15640 %}
15641 ins_encode %{
15642 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15643 $tmp$$XMMRegister, true, true);
15644 %}
15645 ins_pipe(pipe_slow);
15646 %}
15647
15648 // Large non-constant length ClearArray for AVX512 targets.
15649 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15650 Universe dummy, rFlagsReg cr)
15651 %{
15652 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15653 match(Set dummy (ClearArray (Binary cnt base) val));
15654 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15655
15656 format %{ $$template
15657 if (UseFastStosb) {
15658 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15659 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15660 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15661 } else if (UseXMMForObjInit) {
15662 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15663 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15664 $$emit$$"jmpq L_zero_64_bytes\n\t"
15665 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15666 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15667 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15668 $$emit$$"add 0x40,rax\n\t"
15669 $$emit$$"# L_zero_64_bytes:\n\t"
15670 $$emit$$"sub 0x8,rcx\n\t"
15671 $$emit$$"jge L_loop\n\t"
15672 $$emit$$"add 0x4,rcx\n\t"
15673 $$emit$$"jl L_tail\n\t"
15674 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15675 $$emit$$"add 0x20,rax\n\t"
15676 $$emit$$"sub 0x4,rcx\n\t"
15677 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15678 $$emit$$"add 0x4,rcx\n\t"
15679 $$emit$$"jle L_end\n\t"
15680 $$emit$$"dec rcx\n\t"
15681 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15682 $$emit$$"vmovq xmm0,(rax)\n\t"
15683 $$emit$$"add 0x8,rax\n\t"
15684 $$emit$$"dec rcx\n\t"
15685 $$emit$$"jge L_sloop\n\t"
15686 $$emit$$"# L_end:\n\t"
15687 } else {
15688 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15689 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15690 }
15691 %}
15692 ins_encode %{
15693 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15694 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15695 %}
15696 ins_pipe(pipe_slow);
15697 %}
15698
15699 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15700 Universe dummy, rFlagsReg cr)
15701 %{
15702 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15703 match(Set dummy (ClearArray (Binary cnt base) val));
15704 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15705
15706 format %{ $$template
15707 if (UseFastStosb) {
15708 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15709 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15710 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15711 } else if (UseXMMForObjInit) {
15712 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15713 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15714 $$emit$$"jmpq L_zero_64_bytes\n\t"
15715 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15716 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15717 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15718 $$emit$$"add 0x40,rax\n\t"
15719 $$emit$$"# L_zero_64_bytes:\n\t"
15720 $$emit$$"sub 0x8,rcx\n\t"
15721 $$emit$$"jge L_loop\n\t"
15722 $$emit$$"add 0x4,rcx\n\t"
15723 $$emit$$"jl L_tail\n\t"
15724 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15725 $$emit$$"add 0x20,rax\n\t"
15726 $$emit$$"sub 0x4,rcx\n\t"
15727 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15728 $$emit$$"add 0x4,rcx\n\t"
15729 $$emit$$"jle L_end\n\t"
15730 $$emit$$"dec rcx\n\t"
15731 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15732 $$emit$$"vmovq xmm0,(rax)\n\t"
15733 $$emit$$"add 0x8,rax\n\t"
15734 $$emit$$"dec rcx\n\t"
15735 $$emit$$"jge L_sloop\n\t"
15736 $$emit$$"# L_end:\n\t"
15737 } else {
15738 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15739 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15740 }
15741 %}
15742 ins_encode %{
15743 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15744 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15745 %}
15746 ins_pipe(pipe_slow);
15747 %}
15748
15749 // Small constant length ClearArray for AVX512 targets.
15750 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15751 %{
15752 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15753 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15754 match(Set dummy (ClearArray (Binary cnt base) val));
15755 ins_cost(100);
15756 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15757 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15758 ins_encode %{
15759 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15760 %}
15761 ins_pipe(pipe_slow);
15762 %}
15763
15764 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15765 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15766 %{
15767 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15768 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15769 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15770
15771 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15772 ins_encode %{
15773 __ string_compare($str1$$Register, $str2$$Register,
15774 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15775 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15776 %}
15777 ins_pipe( pipe_slow );
15778 %}
15779
15780 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15781 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15782 %{
15783 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15784 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15785 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15786
15787 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15788 ins_encode %{
15789 __ string_compare($str1$$Register, $str2$$Register,
15790 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15791 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15792 %}
15793 ins_pipe( pipe_slow );
15794 %}
15795
15796 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15797 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15798 %{
15799 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15800 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15801 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15802
15803 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15804 ins_encode %{
15805 __ string_compare($str1$$Register, $str2$$Register,
15806 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15807 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15808 %}
15809 ins_pipe( pipe_slow );
15810 %}
15811
15812 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15813 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15814 %{
15815 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15816 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15817 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15818
15819 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15820 ins_encode %{
15821 __ string_compare($str1$$Register, $str2$$Register,
15822 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15823 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15824 %}
15825 ins_pipe( pipe_slow );
15826 %}
15827
15828 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15829 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15830 %{
15831 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15832 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15833 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15834
15835 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15836 ins_encode %{
15837 __ string_compare($str1$$Register, $str2$$Register,
15838 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15839 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15840 %}
15841 ins_pipe( pipe_slow );
15842 %}
15843
15844 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15845 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15846 %{
15847 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15848 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15849 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15850
15851 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15852 ins_encode %{
15853 __ string_compare($str1$$Register, $str2$$Register,
15854 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15855 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15856 %}
15857 ins_pipe( pipe_slow );
15858 %}
15859
15860 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15861 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15862 %{
15863 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15864 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15865 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15866
15867 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15868 ins_encode %{
15869 __ string_compare($str2$$Register, $str1$$Register,
15870 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15871 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15872 %}
15873 ins_pipe( pipe_slow );
15874 %}
15875
15876 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15877 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15878 %{
15879 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15880 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15881 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15882
15883 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15884 ins_encode %{
15885 __ string_compare($str2$$Register, $str1$$Register,
15886 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15887 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15888 %}
15889 ins_pipe( pipe_slow );
15890 %}
15891
15892 // fast search of substring with known size.
15893 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15894 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15895 %{
15896 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15897 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15898 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15899
15900 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15901 ins_encode %{
15902 int icnt2 = (int)$int_cnt2$$constant;
15903 if (icnt2 >= 16) {
15904 // IndexOf for constant substrings with size >= 16 elements
15905 // which don't need to be loaded through stack.
15906 __ string_indexofC8($str1$$Register, $str2$$Register,
15907 $cnt1$$Register, $cnt2$$Register,
15908 icnt2, $result$$Register,
15909 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15910 } else {
15911 // Small strings are loaded through stack if they cross page boundary.
15912 __ string_indexof($str1$$Register, $str2$$Register,
15913 $cnt1$$Register, $cnt2$$Register,
15914 icnt2, $result$$Register,
15915 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15916 }
15917 %}
15918 ins_pipe( pipe_slow );
15919 %}
15920
15921 // fast search of substring with known size.
15922 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15923 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15924 %{
15925 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15926 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15927 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15928
15929 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15930 ins_encode %{
15931 int icnt2 = (int)$int_cnt2$$constant;
15932 if (icnt2 >= 8) {
15933 // IndexOf for constant substrings with size >= 8 elements
15934 // which don't need to be loaded through stack.
15935 __ string_indexofC8($str1$$Register, $str2$$Register,
15936 $cnt1$$Register, $cnt2$$Register,
15937 icnt2, $result$$Register,
15938 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15939 } else {
15940 // Small strings are loaded through stack if they cross page boundary.
15941 __ string_indexof($str1$$Register, $str2$$Register,
15942 $cnt1$$Register, $cnt2$$Register,
15943 icnt2, $result$$Register,
15944 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15945 }
15946 %}
15947 ins_pipe( pipe_slow );
15948 %}
15949
15950 // fast search of substring with known size.
15951 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15952 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15953 %{
15954 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15955 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15956 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15957
15958 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15959 ins_encode %{
15960 int icnt2 = (int)$int_cnt2$$constant;
15961 if (icnt2 >= 8) {
15962 // IndexOf for constant substrings with size >= 8 elements
15963 // which don't need to be loaded through stack.
15964 __ string_indexofC8($str1$$Register, $str2$$Register,
15965 $cnt1$$Register, $cnt2$$Register,
15966 icnt2, $result$$Register,
15967 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15968 } else {
15969 // Small strings are loaded through stack if they cross page boundary.
15970 __ string_indexof($str1$$Register, $str2$$Register,
15971 $cnt1$$Register, $cnt2$$Register,
15972 icnt2, $result$$Register,
15973 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15974 }
15975 %}
15976 ins_pipe( pipe_slow );
15977 %}
15978
15979 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15980 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15981 %{
15982 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15983 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15984 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15985
15986 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15987 ins_encode %{
15988 __ string_indexof($str1$$Register, $str2$$Register,
15989 $cnt1$$Register, $cnt2$$Register,
15990 (-1), $result$$Register,
15991 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15992 %}
15993 ins_pipe( pipe_slow );
15994 %}
15995
15996 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15997 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15998 %{
15999 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
16000 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16001 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16002
16003 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
16004 ins_encode %{
16005 __ string_indexof($str1$$Register, $str2$$Register,
16006 $cnt1$$Register, $cnt2$$Register,
16007 (-1), $result$$Register,
16008 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
16009 %}
16010 ins_pipe( pipe_slow );
16011 %}
16012
16013 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
16014 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
16015 %{
16016 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
16017 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16018 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16019
16020 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
16021 ins_encode %{
16022 __ string_indexof($str1$$Register, $str2$$Register,
16023 $cnt1$$Register, $cnt2$$Register,
16024 (-1), $result$$Register,
16025 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
16026 %}
16027 ins_pipe( pipe_slow );
16028 %}
16029
16030 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16031 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16032 %{
16033 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16034 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16035 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16036 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16037 ins_encode %{
16038 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16039 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16040 %}
16041 ins_pipe( pipe_slow );
16042 %}
16043
16044 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16045 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16046 %{
16047 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16048 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16049 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16050 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16051 ins_encode %{
16052 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16053 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16054 %}
16055 ins_pipe( pipe_slow );
16056 %}
16057
16058 // fast string equals
16059 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16060 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16061 %{
16062 predicate(!VM_Version::supports_avx512vlbw());
16063 match(Set result (StrEquals (Binary str1 str2) cnt));
16064 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16065
16066 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16067 ins_encode %{
16068 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16069 $cnt$$Register, $result$$Register, $tmp3$$Register,
16070 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16071 %}
16072 ins_pipe( pipe_slow );
16073 %}
16074
16075 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16076 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16077 %{
16078 predicate(VM_Version::supports_avx512vlbw());
16079 match(Set result (StrEquals (Binary str1 str2) cnt));
16080 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16081
16082 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16083 ins_encode %{
16084 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16085 $cnt$$Register, $result$$Register, $tmp3$$Register,
16086 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16087 %}
16088 ins_pipe( pipe_slow );
16089 %}
16090
16091 // fast array equals
16092 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16093 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16094 %{
16095 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16096 match(Set result (AryEq ary1 ary2));
16097 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16098
16099 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16100 ins_encode %{
16101 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16102 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16103 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16104 %}
16105 ins_pipe( pipe_slow );
16106 %}
16107
16108 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16109 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16110 %{
16111 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16112 match(Set result (AryEq ary1 ary2));
16113 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16114
16115 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16116 ins_encode %{
16117 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16118 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16119 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16120 %}
16121 ins_pipe( pipe_slow );
16122 %}
16123
16124 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16125 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16126 %{
16127 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16128 match(Set result (AryEq ary1 ary2));
16129 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16130
16131 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16132 ins_encode %{
16133 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16134 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16135 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16136 %}
16137 ins_pipe( pipe_slow );
16138 %}
16139
16140 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16141 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16142 %{
16143 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16144 match(Set result (AryEq ary1 ary2));
16145 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16146
16147 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16148 ins_encode %{
16149 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16150 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16151 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16152 %}
16153 ins_pipe( pipe_slow );
16154 %}
16155
16156 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16157 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16158 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16159 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16160 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16161 %{
16162 predicate(UseAVX >= 2);
16163 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16164 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16165 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16166 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16167 USE basic_type, KILL cr);
16168
16169 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
16170 ins_encode %{
16171 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16172 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16173 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16174 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16175 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16176 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16177 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16178 %}
16179 ins_pipe( pipe_slow );
16180 %}
16181
16182 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16183 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16184 %{
16185 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16186 match(Set result (CountPositives ary1 len));
16187 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16188
16189 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16190 ins_encode %{
16191 __ count_positives($ary1$$Register, $len$$Register,
16192 $result$$Register, $tmp3$$Register,
16193 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16194 %}
16195 ins_pipe( pipe_slow );
16196 %}
16197
16198 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16199 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16200 %{
16201 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16202 match(Set result (CountPositives ary1 len));
16203 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16204
16205 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16206 ins_encode %{
16207 __ count_positives($ary1$$Register, $len$$Register,
16208 $result$$Register, $tmp3$$Register,
16209 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16210 %}
16211 ins_pipe( pipe_slow );
16212 %}
16213
16214 // fast char[] to byte[] compression
16215 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16216 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16217 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16218 match(Set result (StrCompressedCopy src (Binary dst len)));
16219 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16220 USE_KILL len, KILL tmp5, KILL cr);
16221
16222 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16223 ins_encode %{
16224 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16225 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16226 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16227 knoreg, knoreg);
16228 %}
16229 ins_pipe( pipe_slow );
16230 %}
16231
16232 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16233 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16234 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16235 match(Set result (StrCompressedCopy src (Binary dst len)));
16236 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16237 USE_KILL len, KILL tmp5, KILL cr);
16238
16239 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16240 ins_encode %{
16241 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16242 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16243 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16244 $ktmp1$$KRegister, $ktmp2$$KRegister);
16245 %}
16246 ins_pipe( pipe_slow );
16247 %}
16248 // fast byte[] to char[] inflation
16249 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16250 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16251 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16252 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16253 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16254
16255 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16256 ins_encode %{
16257 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16258 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16259 %}
16260 ins_pipe( pipe_slow );
16261 %}
16262
16263 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16264 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16265 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16266 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16267 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16268
16269 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16270 ins_encode %{
16271 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16272 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16273 %}
16274 ins_pipe( pipe_slow );
16275 %}
16276
16277 // encode char[] to byte[] in ISO_8859_1
16278 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16279 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16280 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16281 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16282 match(Set result (EncodeISOArray src (Binary dst len)));
16283 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16284
16285 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16286 ins_encode %{
16287 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16288 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16289 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16290 %}
16291 ins_pipe( pipe_slow );
16292 %}
16293
16294 // encode char[] to byte[] in ASCII
16295 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16296 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16297 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16298 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16299 match(Set result (EncodeISOArray src (Binary dst len)));
16300 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16301
16302 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16303 ins_encode %{
16304 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16305 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16306 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16307 %}
16308 ins_pipe( pipe_slow );
16309 %}
16310
16311 //----------Overflow Math Instructions-----------------------------------------
16312
16313 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16314 %{
16315 match(Set cr (OverflowAddI op1 op2));
16316 effect(DEF cr, USE_KILL op1, USE op2);
16317
16318 format %{ "addl $op1, $op2\t# overflow check int" %}
16319
16320 ins_encode %{
16321 __ addl($op1$$Register, $op2$$Register);
16322 %}
16323 ins_pipe(ialu_reg_reg);
16324 %}
16325
16326 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16327 %{
16328 match(Set cr (OverflowAddI op1 op2));
16329 effect(DEF cr, USE_KILL op1, USE op2);
16330
16331 format %{ "addl $op1, $op2\t# overflow check int" %}
16332
16333 ins_encode %{
16334 __ addl($op1$$Register, $op2$$constant);
16335 %}
16336 ins_pipe(ialu_reg_reg);
16337 %}
16338
16339 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16340 %{
16341 match(Set cr (OverflowAddL op1 op2));
16342 effect(DEF cr, USE_KILL op1, USE op2);
16343
16344 format %{ "addq $op1, $op2\t# overflow check long" %}
16345 ins_encode %{
16346 __ addq($op1$$Register, $op2$$Register);
16347 %}
16348 ins_pipe(ialu_reg_reg);
16349 %}
16350
16351 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16352 %{
16353 match(Set cr (OverflowAddL op1 op2));
16354 effect(DEF cr, USE_KILL op1, USE op2);
16355
16356 format %{ "addq $op1, $op2\t# overflow check long" %}
16357 ins_encode %{
16358 __ addq($op1$$Register, $op2$$constant);
16359 %}
16360 ins_pipe(ialu_reg_reg);
16361 %}
16362
16363 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16364 %{
16365 match(Set cr (OverflowSubI op1 op2));
16366
16367 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16368 ins_encode %{
16369 __ cmpl($op1$$Register, $op2$$Register);
16370 %}
16371 ins_pipe(ialu_reg_reg);
16372 %}
16373
16374 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16375 %{
16376 match(Set cr (OverflowSubI op1 op2));
16377
16378 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16379 ins_encode %{
16380 __ cmpl($op1$$Register, $op2$$constant);
16381 %}
16382 ins_pipe(ialu_reg_reg);
16383 %}
16384
16385 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16386 %{
16387 match(Set cr (OverflowSubL op1 op2));
16388
16389 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16390 ins_encode %{
16391 __ cmpq($op1$$Register, $op2$$Register);
16392 %}
16393 ins_pipe(ialu_reg_reg);
16394 %}
16395
16396 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16397 %{
16398 match(Set cr (OverflowSubL op1 op2));
16399
16400 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16401 ins_encode %{
16402 __ cmpq($op1$$Register, $op2$$constant);
16403 %}
16404 ins_pipe(ialu_reg_reg);
16405 %}
16406
16407 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16408 %{
16409 match(Set cr (OverflowSubI zero op2));
16410 effect(DEF cr, USE_KILL op2);
16411
16412 format %{ "negl $op2\t# overflow check int" %}
16413 ins_encode %{
16414 __ negl($op2$$Register);
16415 %}
16416 ins_pipe(ialu_reg_reg);
16417 %}
16418
16419 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16420 %{
16421 match(Set cr (OverflowSubL zero op2));
16422 effect(DEF cr, USE_KILL op2);
16423
16424 format %{ "negq $op2\t# overflow check long" %}
16425 ins_encode %{
16426 __ negq($op2$$Register);
16427 %}
16428 ins_pipe(ialu_reg_reg);
16429 %}
16430
16431 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16432 %{
16433 match(Set cr (OverflowMulI op1 op2));
16434 effect(DEF cr, USE_KILL op1, USE op2);
16435
16436 format %{ "imull $op1, $op2\t# overflow check int" %}
16437 ins_encode %{
16438 __ imull($op1$$Register, $op2$$Register);
16439 %}
16440 ins_pipe(ialu_reg_reg_alu0);
16441 %}
16442
16443 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16444 %{
16445 match(Set cr (OverflowMulI op1 op2));
16446 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16447
16448 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16449 ins_encode %{
16450 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16451 %}
16452 ins_pipe(ialu_reg_reg_alu0);
16453 %}
16454
16455 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16456 %{
16457 match(Set cr (OverflowMulL op1 op2));
16458 effect(DEF cr, USE_KILL op1, USE op2);
16459
16460 format %{ "imulq $op1, $op2\t# overflow check long" %}
16461 ins_encode %{
16462 __ imulq($op1$$Register, $op2$$Register);
16463 %}
16464 ins_pipe(ialu_reg_reg_alu0);
16465 %}
16466
16467 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16468 %{
16469 match(Set cr (OverflowMulL op1 op2));
16470 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16471
16472 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16473 ins_encode %{
16474 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16475 %}
16476 ins_pipe(ialu_reg_reg_alu0);
16477 %}
16478
16479
16480 //----------Control Flow Instructions------------------------------------------
16481 // Signed compare Instructions
16482
16483 // XXX more variants!!
16484 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16485 %{
16486 match(Set cr (CmpI op1 op2));
16487 effect(DEF cr, USE op1, USE op2);
16488
16489 format %{ "cmpl $op1, $op2" %}
16490 ins_encode %{
16491 __ cmpl($op1$$Register, $op2$$Register);
16492 %}
16493 ins_pipe(ialu_cr_reg_reg);
16494 %}
16495
16496 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16497 %{
16498 match(Set cr (CmpI op1 op2));
16499
16500 format %{ "cmpl $op1, $op2" %}
16501 ins_encode %{
16502 __ cmpl($op1$$Register, $op2$$constant);
16503 %}
16504 ins_pipe(ialu_cr_reg_imm);
16505 %}
16506
16507 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16508 %{
16509 match(Set cr (CmpI op1 (LoadI op2)));
16510
16511 ins_cost(500); // XXX
16512 format %{ "cmpl $op1, $op2" %}
16513 ins_encode %{
16514 __ cmpl($op1$$Register, $op2$$Address);
16515 %}
16516 ins_pipe(ialu_cr_reg_mem);
16517 %}
16518
16519 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16520 %{
16521 match(Set cr (CmpI src zero));
16522
16523 format %{ "testl $src, $src" %}
16524 ins_encode %{
16525 __ testl($src$$Register, $src$$Register);
16526 %}
16527 ins_pipe(ialu_cr_reg_imm);
16528 %}
16529
16530 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16531 %{
16532 match(Set cr (CmpI (AndI src con) zero));
16533
16534 format %{ "testl $src, $con" %}
16535 ins_encode %{
16536 __ testl($src$$Register, $con$$constant);
16537 %}
16538 ins_pipe(ialu_cr_reg_imm);
16539 %}
16540
16541 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16542 %{
16543 match(Set cr (CmpI (AndI src1 src2) zero));
16544
16545 format %{ "testl $src1, $src2" %}
16546 ins_encode %{
16547 __ testl($src1$$Register, $src2$$Register);
16548 %}
16549 ins_pipe(ialu_cr_reg_imm);
16550 %}
16551
16552 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16553 %{
16554 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16555
16556 format %{ "testl $src, $mem" %}
16557 ins_encode %{
16558 __ testl($src$$Register, $mem$$Address);
16559 %}
16560 ins_pipe(ialu_cr_reg_mem);
16561 %}
16562
16563 // Unsigned compare Instructions; really, same as signed except they
16564 // produce an rFlagsRegU instead of rFlagsReg.
16565 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16566 %{
16567 match(Set cr (CmpU op1 op2));
16568
16569 format %{ "cmpl $op1, $op2\t# unsigned" %}
16570 ins_encode %{
16571 __ cmpl($op1$$Register, $op2$$Register);
16572 %}
16573 ins_pipe(ialu_cr_reg_reg);
16574 %}
16575
16576 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16577 %{
16578 match(Set cr (CmpU op1 op2));
16579
16580 format %{ "cmpl $op1, $op2\t# unsigned" %}
16581 ins_encode %{
16582 __ cmpl($op1$$Register, $op2$$constant);
16583 %}
16584 ins_pipe(ialu_cr_reg_imm);
16585 %}
16586
16587 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16588 %{
16589 match(Set cr (CmpU op1 (LoadI op2)));
16590
16591 ins_cost(500); // XXX
16592 format %{ "cmpl $op1, $op2\t# unsigned" %}
16593 ins_encode %{
16594 __ cmpl($op1$$Register, $op2$$Address);
16595 %}
16596 ins_pipe(ialu_cr_reg_mem);
16597 %}
16598
16599 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16600 %{
16601 match(Set cr (CmpU src zero));
16602
16603 format %{ "testl $src, $src\t# unsigned" %}
16604 ins_encode %{
16605 __ testl($src$$Register, $src$$Register);
16606 %}
16607 ins_pipe(ialu_cr_reg_imm);
16608 %}
16609
16610 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16611 %{
16612 match(Set cr (CmpP op1 op2));
16613
16614 format %{ "cmpq $op1, $op2\t# ptr" %}
16615 ins_encode %{
16616 __ cmpq($op1$$Register, $op2$$Register);
16617 %}
16618 ins_pipe(ialu_cr_reg_reg);
16619 %}
16620
16621 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16622 %{
16623 match(Set cr (CmpP op1 (LoadP op2)));
16624 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16625
16626 ins_cost(500); // XXX
16627 format %{ "cmpq $op1, $op2\t# ptr" %}
16628 ins_encode %{
16629 __ cmpq($op1$$Register, $op2$$Address);
16630 %}
16631 ins_pipe(ialu_cr_reg_mem);
16632 %}
16633
16634 // XXX this is generalized by compP_rReg_mem???
16635 // Compare raw pointer (used in out-of-heap check).
16636 // Only works because non-oop pointers must be raw pointers
16637 // and raw pointers have no anti-dependencies.
16638 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16639 %{
16640 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16641 n->in(2)->as_Load()->barrier_data() == 0);
16642 match(Set cr (CmpP op1 (LoadP op2)));
16643
16644 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16645 ins_encode %{
16646 __ cmpq($op1$$Register, $op2$$Address);
16647 %}
16648 ins_pipe(ialu_cr_reg_mem);
16649 %}
16650
16651 // This will generate a signed flags result. This should be OK since
16652 // any compare to a zero should be eq/neq.
16653 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16654 %{
16655 match(Set cr (CmpP src zero));
16656
16657 format %{ "testq $src, $src\t# ptr" %}
16658 ins_encode %{
16659 __ testq($src$$Register, $src$$Register);
16660 %}
16661 ins_pipe(ialu_cr_reg_imm);
16662 %}
16663
16664 // This will generate a signed flags result. This should be OK since
16665 // any compare to a zero should be eq/neq.
16666 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16667 %{
16668 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16669 n->in(1)->as_Load()->barrier_data() == 0);
16670 match(Set cr (CmpP (LoadP op) zero));
16671
16672 ins_cost(500); // XXX
16673 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16674 ins_encode %{
16675 __ testq($op$$Address, 0xFFFFFFFF);
16676 %}
16677 ins_pipe(ialu_cr_reg_imm);
16678 %}
16679
16680 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16681 %{
16682 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16683 n->in(1)->as_Load()->barrier_data() == 0);
16684 match(Set cr (CmpP (LoadP mem) zero));
16685
16686 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16687 ins_encode %{
16688 __ cmpq(r12, $mem$$Address);
16689 %}
16690 ins_pipe(ialu_cr_reg_mem);
16691 %}
16692
16693 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16694 %{
16695 match(Set cr (CmpN op1 op2));
16696
16697 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16698 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16699 ins_pipe(ialu_cr_reg_reg);
16700 %}
16701
16702 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16703 %{
16704 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16705 match(Set cr (CmpN src (LoadN mem)));
16706
16707 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16708 ins_encode %{
16709 __ cmpl($src$$Register, $mem$$Address);
16710 %}
16711 ins_pipe(ialu_cr_reg_mem);
16712 %}
16713
16714 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16715 match(Set cr (CmpN op1 op2));
16716
16717 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16718 ins_encode %{
16719 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16720 %}
16721 ins_pipe(ialu_cr_reg_imm);
16722 %}
16723
16724 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16725 %{
16726 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16727 match(Set cr (CmpN src (LoadN mem)));
16728
16729 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16730 ins_encode %{
16731 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16732 %}
16733 ins_pipe(ialu_cr_reg_mem);
16734 %}
16735
16736 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16737 match(Set cr (CmpN op1 op2));
16738
16739 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16740 ins_encode %{
16741 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16742 %}
16743 ins_pipe(ialu_cr_reg_imm);
16744 %}
16745
16746 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16747 %{
16748 predicate(!UseCompactObjectHeaders);
16749 match(Set cr (CmpN src (LoadNKlass mem)));
16750
16751 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16752 ins_encode %{
16753 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16754 %}
16755 ins_pipe(ialu_cr_reg_mem);
16756 %}
16757
16758 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16759 match(Set cr (CmpN src zero));
16760
16761 format %{ "testl $src, $src\t# compressed ptr" %}
16762 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16763 ins_pipe(ialu_cr_reg_imm);
16764 %}
16765
16766 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16767 %{
16768 predicate(CompressedOops::base() != nullptr &&
16769 n->in(1)->as_Load()->barrier_data() == 0);
16770 match(Set cr (CmpN (LoadN mem) zero));
16771
16772 ins_cost(500); // XXX
16773 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16774 ins_encode %{
16775 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16776 %}
16777 ins_pipe(ialu_cr_reg_mem);
16778 %}
16779
16780 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16781 %{
16782 predicate(CompressedOops::base() == nullptr &&
16783 n->in(1)->as_Load()->barrier_data() == 0);
16784 match(Set cr (CmpN (LoadN mem) zero));
16785
16786 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16787 ins_encode %{
16788 __ cmpl(r12, $mem$$Address);
16789 %}
16790 ins_pipe(ialu_cr_reg_mem);
16791 %}
16792
16793 // Yanked all unsigned pointer compare operations.
16794 // Pointer compares are done with CmpP which is already unsigned.
16795
16796 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16797 %{
16798 match(Set cr (CmpL op1 op2));
16799
16800 format %{ "cmpq $op1, $op2" %}
16801 ins_encode %{
16802 __ cmpq($op1$$Register, $op2$$Register);
16803 %}
16804 ins_pipe(ialu_cr_reg_reg);
16805 %}
16806
16807 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16808 %{
16809 match(Set cr (CmpL op1 op2));
16810
16811 format %{ "cmpq $op1, $op2" %}
16812 ins_encode %{
16813 __ cmpq($op1$$Register, $op2$$constant);
16814 %}
16815 ins_pipe(ialu_cr_reg_imm);
16816 %}
16817
16818 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16819 %{
16820 match(Set cr (CmpL op1 (LoadL op2)));
16821
16822 format %{ "cmpq $op1, $op2" %}
16823 ins_encode %{
16824 __ cmpq($op1$$Register, $op2$$Address);
16825 %}
16826 ins_pipe(ialu_cr_reg_mem);
16827 %}
16828
16829 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16830 %{
16831 match(Set cr (CmpL src zero));
16832
16833 format %{ "testq $src, $src" %}
16834 ins_encode %{
16835 __ testq($src$$Register, $src$$Register);
16836 %}
16837 ins_pipe(ialu_cr_reg_imm);
16838 %}
16839
16840 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16841 %{
16842 match(Set cr (CmpL (AndL src con) zero));
16843
16844 format %{ "testq $src, $con\t# long" %}
16845 ins_encode %{
16846 __ testq($src$$Register, $con$$constant);
16847 %}
16848 ins_pipe(ialu_cr_reg_imm);
16849 %}
16850
16851 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16852 %{
16853 match(Set cr (CmpL (AndL src1 src2) zero));
16854
16855 format %{ "testq $src1, $src2\t# long" %}
16856 ins_encode %{
16857 __ testq($src1$$Register, $src2$$Register);
16858 %}
16859 ins_pipe(ialu_cr_reg_imm);
16860 %}
16861
16862 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16863 %{
16864 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16865
16866 format %{ "testq $src, $mem" %}
16867 ins_encode %{
16868 __ testq($src$$Register, $mem$$Address);
16869 %}
16870 ins_pipe(ialu_cr_reg_mem);
16871 %}
16872
16873 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16874 %{
16875 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16876
16877 format %{ "testq $src, $mem" %}
16878 ins_encode %{
16879 __ testq($src$$Register, $mem$$Address);
16880 %}
16881 ins_pipe(ialu_cr_reg_mem);
16882 %}
16883
16884 // Manifest a CmpU result in an integer register. Very painful.
16885 // This is the test to avoid.
16886 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16887 %{
16888 match(Set dst (CmpU3 src1 src2));
16889 effect(KILL flags);
16890
16891 ins_cost(275); // XXX
16892 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16893 "movl $dst, -1\n\t"
16894 "jb,u done\n\t"
16895 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16896 "done:" %}
16897 ins_encode %{
16898 Label done;
16899 __ cmpl($src1$$Register, $src2$$Register);
16900 __ movl($dst$$Register, -1);
16901 __ jccb(Assembler::below, done);
16902 __ setcc(Assembler::notZero, $dst$$Register);
16903 __ bind(done);
16904 %}
16905 ins_pipe(pipe_slow);
16906 %}
16907
16908 // Manifest a CmpL result in an integer register. Very painful.
16909 // This is the test to avoid.
16910 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16911 %{
16912 match(Set dst (CmpL3 src1 src2));
16913 effect(KILL flags);
16914
16915 ins_cost(275); // XXX
16916 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16917 "movl $dst, -1\n\t"
16918 "jl,s done\n\t"
16919 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16920 "done:" %}
16921 ins_encode %{
16922 Label done;
16923 __ cmpq($src1$$Register, $src2$$Register);
16924 __ movl($dst$$Register, -1);
16925 __ jccb(Assembler::less, done);
16926 __ setcc(Assembler::notZero, $dst$$Register);
16927 __ bind(done);
16928 %}
16929 ins_pipe(pipe_slow);
16930 %}
16931
16932 // Manifest a CmpUL result in an integer register. Very painful.
16933 // This is the test to avoid.
16934 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16935 %{
16936 match(Set dst (CmpUL3 src1 src2));
16937 effect(KILL flags);
16938
16939 ins_cost(275); // XXX
16940 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16941 "movl $dst, -1\n\t"
16942 "jb,u done\n\t"
16943 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16944 "done:" %}
16945 ins_encode %{
16946 Label done;
16947 __ cmpq($src1$$Register, $src2$$Register);
16948 __ movl($dst$$Register, -1);
16949 __ jccb(Assembler::below, done);
16950 __ setcc(Assembler::notZero, $dst$$Register);
16951 __ bind(done);
16952 %}
16953 ins_pipe(pipe_slow);
16954 %}
16955
16956 // Unsigned long compare Instructions; really, same as signed long except they
16957 // produce an rFlagsRegU instead of rFlagsReg.
16958 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16959 %{
16960 match(Set cr (CmpUL op1 op2));
16961
16962 format %{ "cmpq $op1, $op2\t# unsigned" %}
16963 ins_encode %{
16964 __ cmpq($op1$$Register, $op2$$Register);
16965 %}
16966 ins_pipe(ialu_cr_reg_reg);
16967 %}
16968
16969 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16970 %{
16971 match(Set cr (CmpUL op1 op2));
16972
16973 format %{ "cmpq $op1, $op2\t# unsigned" %}
16974 ins_encode %{
16975 __ cmpq($op1$$Register, $op2$$constant);
16976 %}
16977 ins_pipe(ialu_cr_reg_imm);
16978 %}
16979
16980 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16981 %{
16982 match(Set cr (CmpUL op1 (LoadL op2)));
16983
16984 format %{ "cmpq $op1, $op2\t# unsigned" %}
16985 ins_encode %{
16986 __ cmpq($op1$$Register, $op2$$Address);
16987 %}
16988 ins_pipe(ialu_cr_reg_mem);
16989 %}
16990
16991 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16992 %{
16993 match(Set cr (CmpUL src zero));
16994
16995 format %{ "testq $src, $src\t# unsigned" %}
16996 ins_encode %{
16997 __ testq($src$$Register, $src$$Register);
16998 %}
16999 ins_pipe(ialu_cr_reg_imm);
17000 %}
17001
17002 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
17003 %{
17004 match(Set cr (CmpI (LoadB mem) imm));
17005
17006 ins_cost(125);
17007 format %{ "cmpb $mem, $imm" %}
17008 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
17009 ins_pipe(ialu_cr_reg_mem);
17010 %}
17011
17012 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
17013 %{
17014 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
17015
17016 ins_cost(125);
17017 format %{ "testb $mem, $imm\t# ubyte" %}
17018 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17019 ins_pipe(ialu_cr_reg_mem);
17020 %}
17021
17022 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
17023 %{
17024 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
17025
17026 ins_cost(125);
17027 format %{ "testb $mem, $imm\t# byte" %}
17028 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17029 ins_pipe(ialu_cr_reg_mem);
17030 %}
17031
17032 //----------Max and Min--------------------------------------------------------
17033 // Min Instructions
17034
17035 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17036 %{
17037 predicate(!UseAPX);
17038 effect(USE_DEF dst, USE src, USE cr);
17039
17040 format %{ "cmovlgt $dst, $src\t# min" %}
17041 ins_encode %{
17042 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17043 %}
17044 ins_pipe(pipe_cmov_reg);
17045 %}
17046
17047 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17048 %{
17049 predicate(UseAPX);
17050 effect(DEF dst, USE src1, USE src2, USE cr);
17051
17052 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17053 ins_encode %{
17054 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17055 %}
17056 ins_pipe(pipe_cmov_reg);
17057 %}
17058
17059 instruct minI_rReg(rRegI dst, rRegI src)
17060 %{
17061 predicate(!UseAPX);
17062 match(Set dst (MinI dst src));
17063
17064 ins_cost(200);
17065 expand %{
17066 rFlagsReg cr;
17067 compI_rReg(cr, dst, src);
17068 cmovI_reg_g(dst, src, cr);
17069 %}
17070 %}
17071
17072 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17073 %{
17074 predicate(UseAPX);
17075 match(Set dst (MinI src1 src2));
17076 effect(DEF dst, USE src1, USE src2);
17077 flag(PD::Flag_ndd_demotable_opr1);
17078
17079 ins_cost(200);
17080 expand %{
17081 rFlagsReg cr;
17082 compI_rReg(cr, src1, src2);
17083 cmovI_reg_g_ndd(dst, src1, src2, cr);
17084 %}
17085 %}
17086
17087 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17088 %{
17089 predicate(!UseAPX);
17090 effect(USE_DEF dst, USE src, USE cr);
17091
17092 format %{ "cmovllt $dst, $src\t# max" %}
17093 ins_encode %{
17094 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17095 %}
17096 ins_pipe(pipe_cmov_reg);
17097 %}
17098
17099 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17100 %{
17101 predicate(UseAPX);
17102 effect(DEF dst, USE src1, USE src2, USE cr);
17103
17104 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17105 ins_encode %{
17106 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17107 %}
17108 ins_pipe(pipe_cmov_reg);
17109 %}
17110
17111 instruct maxI_rReg(rRegI dst, rRegI src)
17112 %{
17113 predicate(!UseAPX);
17114 match(Set dst (MaxI dst src));
17115
17116 ins_cost(200);
17117 expand %{
17118 rFlagsReg cr;
17119 compI_rReg(cr, dst, src);
17120 cmovI_reg_l(dst, src, cr);
17121 %}
17122 %}
17123
17124 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17125 %{
17126 predicate(UseAPX);
17127 match(Set dst (MaxI src1 src2));
17128 effect(DEF dst, USE src1, USE src2);
17129 flag(PD::Flag_ndd_demotable_opr1);
17130
17131 ins_cost(200);
17132 expand %{
17133 rFlagsReg cr;
17134 compI_rReg(cr, src1, src2);
17135 cmovI_reg_l_ndd(dst, src1, src2, cr);
17136 %}
17137 %}
17138
17139 // ============================================================================
17140 // Branch Instructions
17141
17142 // Jump Direct - Label defines a relative address from JMP+1
17143 instruct jmpDir(label labl)
17144 %{
17145 match(Goto);
17146 effect(USE labl);
17147
17148 ins_cost(300);
17149 format %{ "jmp $labl" %}
17150 size(5);
17151 ins_encode %{
17152 Label* L = $labl$$label;
17153 __ jmp(*L, false); // Always long jump
17154 %}
17155 ins_pipe(pipe_jmp);
17156 %}
17157
17158 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17159 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17160 %{
17161 match(If cop cr);
17162 effect(USE labl);
17163
17164 ins_cost(300);
17165 format %{ "j$cop $labl" %}
17166 size(6);
17167 ins_encode %{
17168 Label* L = $labl$$label;
17169 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17170 %}
17171 ins_pipe(pipe_jcc);
17172 %}
17173
17174 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17175 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17176 %{
17177 match(CountedLoopEnd cop cr);
17178 effect(USE labl);
17179
17180 ins_cost(300);
17181 format %{ "j$cop $labl\t# loop end" %}
17182 size(6);
17183 ins_encode %{
17184 Label* L = $labl$$label;
17185 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17186 %}
17187 ins_pipe(pipe_jcc);
17188 %}
17189
17190 // Jump Direct Conditional - using unsigned comparison
17191 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17192 match(If cop cmp);
17193 effect(USE labl);
17194
17195 ins_cost(300);
17196 format %{ "j$cop,u $labl" %}
17197 size(6);
17198 ins_encode %{
17199 Label* L = $labl$$label;
17200 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17201 %}
17202 ins_pipe(pipe_jcc);
17203 %}
17204
17205 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17206 match(If cop cmp);
17207 effect(USE labl);
17208
17209 ins_cost(200);
17210 format %{ "j$cop,u $labl" %}
17211 size(6);
17212 ins_encode %{
17213 Label* L = $labl$$label;
17214 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17215 %}
17216 ins_pipe(pipe_jcc);
17217 %}
17218
17219 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17220 match(If cop cmp);
17221 effect(USE labl);
17222
17223 ins_cost(200);
17224 format %{ $$template
17225 if ($cop$$cmpcode == Assembler::notEqual) {
17226 $$emit$$"jp,u $labl\n\t"
17227 $$emit$$"j$cop,u $labl"
17228 } else {
17229 $$emit$$"jp,u done\n\t"
17230 $$emit$$"j$cop,u $labl\n\t"
17231 $$emit$$"done:"
17232 }
17233 %}
17234 ins_encode %{
17235 Label* l = $labl$$label;
17236 if ($cop$$cmpcode == Assembler::notEqual) {
17237 __ jcc(Assembler::parity, *l, false);
17238 __ jcc(Assembler::notEqual, *l, false);
17239 } else if ($cop$$cmpcode == Assembler::equal) {
17240 Label done;
17241 __ jccb(Assembler::parity, done);
17242 __ jcc(Assembler::equal, *l, false);
17243 __ bind(done);
17244 } else {
17245 ShouldNotReachHere();
17246 }
17247 %}
17248 ins_pipe(pipe_jcc);
17249 %}
17250
17251 // Jump Direct Conditional - using signed and unsigned comparison
17252 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17253 match(If cop cmp);
17254 effect(USE labl);
17255
17256 ins_cost(200);
17257 format %{ "j$cop,su $labl" %}
17258 size(6);
17259 ins_encode %{
17260 Label* L = $labl$$label;
17261 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17262 %}
17263 ins_pipe(pipe_jcc);
17264 %}
17265
17266 // ============================================================================
17267 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17268 // superklass array for an instance of the superklass. Set a hidden
17269 // internal cache on a hit (cache is checked with exposed code in
17270 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17271 // encoding ALSO sets flags.
17272
17273 instruct partialSubtypeCheck(rdi_RegP result,
17274 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17275 rFlagsReg cr)
17276 %{
17277 match(Set result (PartialSubtypeCheck sub super));
17278 predicate(!UseSecondarySupersTable);
17279 effect(KILL rcx, KILL cr);
17280
17281 ins_cost(1100); // slightly larger than the next version
17282 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17283 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17284 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17285 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17286 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17287 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17288 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17289 "miss:\t" %}
17290
17291 ins_encode %{
17292 Label miss;
17293 // NB: Callers may assume that, when $result is a valid register,
17294 // check_klass_subtype_slow_path_linear sets it to a nonzero
17295 // value.
17296 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17297 $rcx$$Register, $result$$Register,
17298 nullptr, &miss,
17299 /*set_cond_codes:*/ true);
17300 __ xorptr($result$$Register, $result$$Register);
17301 __ bind(miss);
17302 %}
17303
17304 ins_pipe(pipe_slow);
17305 %}
17306
17307 // ============================================================================
17308 // Two versions of hashtable-based partialSubtypeCheck, both used when
17309 // we need to search for a super class in the secondary supers array.
17310 // The first is used when we don't know _a priori_ the class being
17311 // searched for. The second, far more common, is used when we do know:
17312 // this is used for instanceof, checkcast, and any case where C2 can
17313 // determine it by constant propagation.
17314
17315 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17316 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17317 rFlagsReg cr)
17318 %{
17319 match(Set result (PartialSubtypeCheck sub super));
17320 predicate(UseSecondarySupersTable);
17321 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17322
17323 ins_cost(1000);
17324 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17325
17326 ins_encode %{
17327 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17328 $temp3$$Register, $temp4$$Register, $result$$Register);
17329 %}
17330
17331 ins_pipe(pipe_slow);
17332 %}
17333
17334 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17335 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17336 rFlagsReg cr)
17337 %{
17338 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17339 predicate(UseSecondarySupersTable);
17340 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17341
17342 ins_cost(700); // smaller than the next version
17343 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17344
17345 ins_encode %{
17346 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17347 if (InlineSecondarySupersTest) {
17348 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17349 $temp3$$Register, $temp4$$Register, $result$$Register,
17350 super_klass_slot);
17351 } else {
17352 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17353 }
17354 %}
17355
17356 ins_pipe(pipe_slow);
17357 %}
17358
17359 // ============================================================================
17360 // Branch Instructions -- short offset versions
17361 //
17362 // These instructions are used to replace jumps of a long offset (the default
17363 // match) with jumps of a shorter offset. These instructions are all tagged
17364 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17365 // match rules in general matching. Instead, the ADLC generates a conversion
17366 // method in the MachNode which can be used to do in-place replacement of the
17367 // long variant with the shorter variant. The compiler will determine if a
17368 // branch can be taken by the is_short_branch_offset() predicate in the machine
17369 // specific code section of the file.
17370
17371 // Jump Direct - Label defines a relative address from JMP+1
17372 instruct jmpDir_short(label labl) %{
17373 match(Goto);
17374 effect(USE labl);
17375
17376 ins_cost(300);
17377 format %{ "jmp,s $labl" %}
17378 size(2);
17379 ins_encode %{
17380 Label* L = $labl$$label;
17381 __ jmpb(*L);
17382 %}
17383 ins_pipe(pipe_jmp);
17384 ins_short_branch(1);
17385 %}
17386
17387 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17388 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17389 match(If cop cr);
17390 effect(USE labl);
17391
17392 ins_cost(300);
17393 format %{ "j$cop,s $labl" %}
17394 size(2);
17395 ins_encode %{
17396 Label* L = $labl$$label;
17397 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17398 %}
17399 ins_pipe(pipe_jcc);
17400 ins_short_branch(1);
17401 %}
17402
17403 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17404 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17405 match(CountedLoopEnd cop cr);
17406 effect(USE labl);
17407
17408 ins_cost(300);
17409 format %{ "j$cop,s $labl\t# loop end" %}
17410 size(2);
17411 ins_encode %{
17412 Label* L = $labl$$label;
17413 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17414 %}
17415 ins_pipe(pipe_jcc);
17416 ins_short_branch(1);
17417 %}
17418
17419 // Jump Direct Conditional - using unsigned comparison
17420 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17421 match(If cop cmp);
17422 effect(USE labl);
17423
17424 ins_cost(300);
17425 format %{ "j$cop,us $labl" %}
17426 size(2);
17427 ins_encode %{
17428 Label* L = $labl$$label;
17429 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17430 %}
17431 ins_pipe(pipe_jcc);
17432 ins_short_branch(1);
17433 %}
17434
17435 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17436 match(If cop cmp);
17437 effect(USE labl);
17438
17439 ins_cost(300);
17440 format %{ "j$cop,us $labl" %}
17441 size(2);
17442 ins_encode %{
17443 Label* L = $labl$$label;
17444 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17445 %}
17446 ins_pipe(pipe_jcc);
17447 ins_short_branch(1);
17448 %}
17449
17450 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17451 match(If cop cmp);
17452 effect(USE labl);
17453
17454 ins_cost(300);
17455 format %{ $$template
17456 if ($cop$$cmpcode == Assembler::notEqual) {
17457 $$emit$$"jp,u,s $labl\n\t"
17458 $$emit$$"j$cop,u,s $labl"
17459 } else {
17460 $$emit$$"jp,u,s done\n\t"
17461 $$emit$$"j$cop,u,s $labl\n\t"
17462 $$emit$$"done:"
17463 }
17464 %}
17465 size(4);
17466 ins_encode %{
17467 Label* l = $labl$$label;
17468 if ($cop$$cmpcode == Assembler::notEqual) {
17469 __ jccb(Assembler::parity, *l);
17470 __ jccb(Assembler::notEqual, *l);
17471 } else if ($cop$$cmpcode == Assembler::equal) {
17472 Label done;
17473 __ jccb(Assembler::parity, done);
17474 __ jccb(Assembler::equal, *l);
17475 __ bind(done);
17476 } else {
17477 ShouldNotReachHere();
17478 }
17479 %}
17480 ins_pipe(pipe_jcc);
17481 ins_short_branch(1);
17482 %}
17483
17484 // Jump Direct Conditional - using signed and unsigned comparison
17485 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17486 match(If cop cmp);
17487 effect(USE labl);
17488
17489 ins_cost(300);
17490 format %{ "j$cop,sus $labl" %}
17491 size(2);
17492 ins_encode %{
17493 Label* L = $labl$$label;
17494 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17495 %}
17496 ins_pipe(pipe_jcc);
17497 ins_short_branch(1);
17498 %}
17499
17500 // ============================================================================
17501 // inlined locking and unlocking
17502
17503 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17504 match(Set cr (FastLock object box));
17505 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17506 ins_cost(300);
17507 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17508 ins_encode %{
17509 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17510 %}
17511 ins_pipe(pipe_slow);
17512 %}
17513
17514 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17515 match(Set cr (FastUnlock object rax_reg));
17516 effect(TEMP tmp, USE_KILL rax_reg);
17517 ins_cost(300);
17518 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17519 ins_encode %{
17520 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17521 %}
17522 ins_pipe(pipe_slow);
17523 %}
17524
17525
17526 // ============================================================================
17527 // Safepoint Instructions
17528 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17529 %{
17530 match(SafePoint poll);
17531 effect(KILL cr, USE poll);
17532
17533 format %{ "testl rax, [$poll]\t"
17534 "# Safepoint: poll for GC" %}
17535 ins_cost(125);
17536 ins_encode %{
17537 __ relocate(relocInfo::poll_type);
17538 address pre_pc = __ pc();
17539 __ testl(rax, Address($poll$$Register, 0));
17540 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17541 %}
17542 ins_pipe(ialu_reg_mem);
17543 %}
17544
17545 instruct mask_all_evexL(kReg dst, rRegL src) %{
17546 match(Set dst (MaskAll src));
17547 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17548 ins_encode %{
17549 int mask_len = Matcher::vector_length(this);
17550 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17551 %}
17552 ins_pipe( pipe_slow );
17553 %}
17554
17555 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17556 predicate(Matcher::vector_length(n) > 32);
17557 match(Set dst (MaskAll src));
17558 effect(TEMP tmp);
17559 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17560 ins_encode %{
17561 int mask_len = Matcher::vector_length(this);
17562 __ movslq($tmp$$Register, $src$$Register);
17563 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17564 %}
17565 ins_pipe( pipe_slow );
17566 %}
17567
17568 // ============================================================================
17569 // Procedure Call/Return Instructions
17570 // Call Java Static Instruction
17571 // Note: If this code changes, the corresponding ret_addr_offset() and
17572 // compute_padding() functions will have to be adjusted.
17573 instruct CallStaticJavaDirect(method meth) %{
17574 match(CallStaticJava);
17575 effect(USE meth);
17576
17577 ins_cost(300);
17578 format %{ "call,static " %}
17579 opcode(0xE8); /* E8 cd */
17580 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17581 ins_pipe(pipe_slow);
17582 ins_alignment(4);
17583 %}
17584
17585 // Call Java Dynamic Instruction
17586 // Note: If this code changes, the corresponding ret_addr_offset() and
17587 // compute_padding() functions will have to be adjusted.
17588 instruct CallDynamicJavaDirect(method meth)
17589 %{
17590 match(CallDynamicJava);
17591 effect(USE meth);
17592
17593 ins_cost(300);
17594 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17595 "call,dynamic " %}
17596 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17597 ins_pipe(pipe_slow);
17598 ins_alignment(4);
17599 %}
17600
17601 // Call Runtime Instruction
17602 instruct CallRuntimeDirect(method meth)
17603 %{
17604 match(CallRuntime);
17605 effect(USE meth);
17606
17607 ins_cost(300);
17608 format %{ "call,runtime " %}
17609 ins_encode(clear_avx, Java_To_Runtime(meth));
17610 ins_pipe(pipe_slow);
17611 %}
17612
17613 // Call runtime without safepoint
17614 instruct CallLeafDirect(method meth)
17615 %{
17616 match(CallLeaf);
17617 effect(USE meth);
17618
17619 ins_cost(300);
17620 format %{ "call_leaf,runtime " %}
17621 ins_encode(clear_avx, Java_To_Runtime(meth));
17622 ins_pipe(pipe_slow);
17623 %}
17624
17625 // Call runtime without safepoint and with vector arguments
17626 instruct CallLeafDirectVector(method meth)
17627 %{
17628 match(CallLeafVector);
17629 effect(USE meth);
17630
17631 ins_cost(300);
17632 format %{ "call_leaf,vector " %}
17633 ins_encode(Java_To_Runtime(meth));
17634 ins_pipe(pipe_slow);
17635 %}
17636
17637 // Call runtime without safepoint
17638 // entry point is null, target holds the address to call
17639 instruct CallLeafNoFPInDirect(rRegP target)
17640 %{
17641 predicate(n->as_Call()->entry_point() == nullptr);
17642 match(CallLeafNoFP target);
17643
17644 ins_cost(300);
17645 format %{ "call_leaf_nofp,runtime indirect " %}
17646 ins_encode %{
17647 __ call($target$$Register);
17648 %}
17649
17650 ins_pipe(pipe_slow);
17651 %}
17652
17653 // Call runtime without safepoint
17654 instruct CallLeafNoFPDirect(method meth)
17655 %{
17656 predicate(n->as_Call()->entry_point() != nullptr);
17657 match(CallLeafNoFP);
17658 effect(USE meth);
17659
17660 ins_cost(300);
17661 format %{ "call_leaf_nofp,runtime " %}
17662 ins_encode(clear_avx, Java_To_Runtime(meth));
17663 ins_pipe(pipe_slow);
17664 %}
17665
17666 // Return Instruction
17667 // Remove the return address & jump to it.
17668 // Notice: We always emit a nop after a ret to make sure there is room
17669 // for safepoint patching
17670 instruct Ret()
17671 %{
17672 match(Return);
17673
17674 format %{ "ret" %}
17675 ins_encode %{
17676 __ ret(0);
17677 %}
17678 ins_pipe(pipe_jmp);
17679 %}
17680
17681 // Tail Call; Jump from runtime stub to Java code.
17682 // Also known as an 'interprocedural jump'.
17683 // Target of jump will eventually return to caller.
17684 // TailJump below removes the return address.
17685 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17686 // emitted just above the TailCall which has reset rbp to the caller state.
17687 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17688 %{
17689 match(TailCall jump_target method_ptr);
17690
17691 ins_cost(300);
17692 format %{ "jmp $jump_target\t# rbx holds method" %}
17693 ins_encode %{
17694 __ jmp($jump_target$$Register);
17695 %}
17696 ins_pipe(pipe_jmp);
17697 %}
17698
17699 // Tail Jump; remove the return address; jump to target.
17700 // TailCall above leaves the return address around.
17701 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17702 %{
17703 match(TailJump jump_target ex_oop);
17704
17705 ins_cost(300);
17706 format %{ "popq rdx\t# pop return address\n\t"
17707 "jmp $jump_target" %}
17708 ins_encode %{
17709 __ popq(as_Register(RDX_enc));
17710 __ jmp($jump_target$$Register);
17711 %}
17712 ins_pipe(pipe_jmp);
17713 %}
17714
17715 // Forward exception.
17716 instruct ForwardExceptionjmp()
17717 %{
17718 match(ForwardException);
17719
17720 format %{ "jmp forward_exception_stub" %}
17721 ins_encode %{
17722 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17723 %}
17724 ins_pipe(pipe_jmp);
17725 %}
17726
17727 // Create exception oop: created by stack-crawling runtime code.
17728 // Created exception is now available to this handler, and is setup
17729 // just prior to jumping to this handler. No code emitted.
17730 instruct CreateException(rax_RegP ex_oop)
17731 %{
17732 match(Set ex_oop (CreateEx));
17733
17734 size(0);
17735 // use the following format syntax
17736 format %{ "# exception oop is in rax; no code emitted" %}
17737 ins_encode();
17738 ins_pipe(empty);
17739 %}
17740
17741 // Rethrow exception:
17742 // The exception oop will come in the first argument position.
17743 // Then JUMP (not call) to the rethrow stub code.
17744 instruct RethrowException()
17745 %{
17746 match(Rethrow);
17747
17748 // use the following format syntax
17749 format %{ "jmp rethrow_stub" %}
17750 ins_encode %{
17751 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17752 %}
17753 ins_pipe(pipe_jmp);
17754 %}
17755
17756 // ============================================================================
17757 // This name is KNOWN by the ADLC and cannot be changed.
17758 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17759 // for this guy.
17760 instruct tlsLoadP(r15_RegP dst) %{
17761 match(Set dst (ThreadLocal));
17762 effect(DEF dst);
17763
17764 size(0);
17765 format %{ "# TLS is in R15" %}
17766 ins_encode( /*empty encoding*/ );
17767 ins_pipe(ialu_reg_reg);
17768 %}
17769
17770 instruct addF_reg(regF dst, regF src) %{
17771 predicate(UseAVX == 0);
17772 match(Set dst (AddF dst src));
17773
17774 format %{ "addss $dst, $src" %}
17775 ins_cost(150);
17776 ins_encode %{
17777 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17778 %}
17779 ins_pipe(pipe_slow);
17780 %}
17781
17782 instruct addF_mem(regF dst, memory src) %{
17783 predicate(UseAVX == 0);
17784 match(Set dst (AddF dst (LoadF src)));
17785
17786 format %{ "addss $dst, $src" %}
17787 ins_cost(150);
17788 ins_encode %{
17789 __ addss($dst$$XMMRegister, $src$$Address);
17790 %}
17791 ins_pipe(pipe_slow);
17792 %}
17793
17794 instruct addF_imm(regF dst, immF con) %{
17795 predicate(UseAVX == 0);
17796 match(Set dst (AddF dst con));
17797 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17798 ins_cost(150);
17799 ins_encode %{
17800 __ addss($dst$$XMMRegister, $constantaddress($con));
17801 %}
17802 ins_pipe(pipe_slow);
17803 %}
17804
17805 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17806 predicate(UseAVX > 0);
17807 match(Set dst (AddF src1 src2));
17808
17809 format %{ "vaddss $dst, $src1, $src2" %}
17810 ins_cost(150);
17811 ins_encode %{
17812 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17813 %}
17814 ins_pipe(pipe_slow);
17815 %}
17816
17817 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17818 predicate(UseAVX > 0);
17819 match(Set dst (AddF src1 (LoadF src2)));
17820
17821 format %{ "vaddss $dst, $src1, $src2" %}
17822 ins_cost(150);
17823 ins_encode %{
17824 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17825 %}
17826 ins_pipe(pipe_slow);
17827 %}
17828
17829 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17830 predicate(UseAVX > 0);
17831 match(Set dst (AddF src con));
17832
17833 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17834 ins_cost(150);
17835 ins_encode %{
17836 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17837 %}
17838 ins_pipe(pipe_slow);
17839 %}
17840
17841 instruct addD_reg(regD dst, regD src) %{
17842 predicate(UseAVX == 0);
17843 match(Set dst (AddD dst src));
17844
17845 format %{ "addsd $dst, $src" %}
17846 ins_cost(150);
17847 ins_encode %{
17848 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17849 %}
17850 ins_pipe(pipe_slow);
17851 %}
17852
17853 instruct addD_mem(regD dst, memory src) %{
17854 predicate(UseAVX == 0);
17855 match(Set dst (AddD dst (LoadD src)));
17856
17857 format %{ "addsd $dst, $src" %}
17858 ins_cost(150);
17859 ins_encode %{
17860 __ addsd($dst$$XMMRegister, $src$$Address);
17861 %}
17862 ins_pipe(pipe_slow);
17863 %}
17864
17865 instruct addD_imm(regD dst, immD con) %{
17866 predicate(UseAVX == 0);
17867 match(Set dst (AddD dst con));
17868 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17869 ins_cost(150);
17870 ins_encode %{
17871 __ addsd($dst$$XMMRegister, $constantaddress($con));
17872 %}
17873 ins_pipe(pipe_slow);
17874 %}
17875
17876 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17877 predicate(UseAVX > 0);
17878 match(Set dst (AddD src1 src2));
17879
17880 format %{ "vaddsd $dst, $src1, $src2" %}
17881 ins_cost(150);
17882 ins_encode %{
17883 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17884 %}
17885 ins_pipe(pipe_slow);
17886 %}
17887
17888 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17889 predicate(UseAVX > 0);
17890 match(Set dst (AddD src1 (LoadD src2)));
17891
17892 format %{ "vaddsd $dst, $src1, $src2" %}
17893 ins_cost(150);
17894 ins_encode %{
17895 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17896 %}
17897 ins_pipe(pipe_slow);
17898 %}
17899
17900 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17901 predicate(UseAVX > 0);
17902 match(Set dst (AddD src con));
17903
17904 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17905 ins_cost(150);
17906 ins_encode %{
17907 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17908 %}
17909 ins_pipe(pipe_slow);
17910 %}
17911
17912 instruct subF_reg(regF dst, regF src) %{
17913 predicate(UseAVX == 0);
17914 match(Set dst (SubF dst src));
17915
17916 format %{ "subss $dst, $src" %}
17917 ins_cost(150);
17918 ins_encode %{
17919 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17920 %}
17921 ins_pipe(pipe_slow);
17922 %}
17923
17924 instruct subF_mem(regF dst, memory src) %{
17925 predicate(UseAVX == 0);
17926 match(Set dst (SubF dst (LoadF src)));
17927
17928 format %{ "subss $dst, $src" %}
17929 ins_cost(150);
17930 ins_encode %{
17931 __ subss($dst$$XMMRegister, $src$$Address);
17932 %}
17933 ins_pipe(pipe_slow);
17934 %}
17935
17936 instruct subF_imm(regF dst, immF con) %{
17937 predicate(UseAVX == 0);
17938 match(Set dst (SubF dst con));
17939 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17940 ins_cost(150);
17941 ins_encode %{
17942 __ subss($dst$$XMMRegister, $constantaddress($con));
17943 %}
17944 ins_pipe(pipe_slow);
17945 %}
17946
17947 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17948 predicate(UseAVX > 0);
17949 match(Set dst (SubF src1 src2));
17950
17951 format %{ "vsubss $dst, $src1, $src2" %}
17952 ins_cost(150);
17953 ins_encode %{
17954 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17955 %}
17956 ins_pipe(pipe_slow);
17957 %}
17958
17959 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17960 predicate(UseAVX > 0);
17961 match(Set dst (SubF src1 (LoadF src2)));
17962
17963 format %{ "vsubss $dst, $src1, $src2" %}
17964 ins_cost(150);
17965 ins_encode %{
17966 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17967 %}
17968 ins_pipe(pipe_slow);
17969 %}
17970
17971 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17972 predicate(UseAVX > 0);
17973 match(Set dst (SubF src con));
17974
17975 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17976 ins_cost(150);
17977 ins_encode %{
17978 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17979 %}
17980 ins_pipe(pipe_slow);
17981 %}
17982
17983 instruct subD_reg(regD dst, regD src) %{
17984 predicate(UseAVX == 0);
17985 match(Set dst (SubD dst src));
17986
17987 format %{ "subsd $dst, $src" %}
17988 ins_cost(150);
17989 ins_encode %{
17990 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17991 %}
17992 ins_pipe(pipe_slow);
17993 %}
17994
17995 instruct subD_mem(regD dst, memory src) %{
17996 predicate(UseAVX == 0);
17997 match(Set dst (SubD dst (LoadD src)));
17998
17999 format %{ "subsd $dst, $src" %}
18000 ins_cost(150);
18001 ins_encode %{
18002 __ subsd($dst$$XMMRegister, $src$$Address);
18003 %}
18004 ins_pipe(pipe_slow);
18005 %}
18006
18007 instruct subD_imm(regD dst, immD con) %{
18008 predicate(UseAVX == 0);
18009 match(Set dst (SubD dst con));
18010 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18011 ins_cost(150);
18012 ins_encode %{
18013 __ subsd($dst$$XMMRegister, $constantaddress($con));
18014 %}
18015 ins_pipe(pipe_slow);
18016 %}
18017
18018 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
18019 predicate(UseAVX > 0);
18020 match(Set dst (SubD src1 src2));
18021
18022 format %{ "vsubsd $dst, $src1, $src2" %}
18023 ins_cost(150);
18024 ins_encode %{
18025 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18026 %}
18027 ins_pipe(pipe_slow);
18028 %}
18029
18030 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
18031 predicate(UseAVX > 0);
18032 match(Set dst (SubD src1 (LoadD src2)));
18033
18034 format %{ "vsubsd $dst, $src1, $src2" %}
18035 ins_cost(150);
18036 ins_encode %{
18037 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18038 %}
18039 ins_pipe(pipe_slow);
18040 %}
18041
18042 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18043 predicate(UseAVX > 0);
18044 match(Set dst (SubD src con));
18045
18046 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18047 ins_cost(150);
18048 ins_encode %{
18049 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18050 %}
18051 ins_pipe(pipe_slow);
18052 %}
18053
18054 instruct mulF_reg(regF dst, regF src) %{
18055 predicate(UseAVX == 0);
18056 match(Set dst (MulF dst src));
18057
18058 format %{ "mulss $dst, $src" %}
18059 ins_cost(150);
18060 ins_encode %{
18061 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18062 %}
18063 ins_pipe(pipe_slow);
18064 %}
18065
18066 instruct mulF_mem(regF dst, memory src) %{
18067 predicate(UseAVX == 0);
18068 match(Set dst (MulF dst (LoadF src)));
18069
18070 format %{ "mulss $dst, $src" %}
18071 ins_cost(150);
18072 ins_encode %{
18073 __ mulss($dst$$XMMRegister, $src$$Address);
18074 %}
18075 ins_pipe(pipe_slow);
18076 %}
18077
18078 instruct mulF_imm(regF dst, immF con) %{
18079 predicate(UseAVX == 0);
18080 match(Set dst (MulF dst con));
18081 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18082 ins_cost(150);
18083 ins_encode %{
18084 __ mulss($dst$$XMMRegister, $constantaddress($con));
18085 %}
18086 ins_pipe(pipe_slow);
18087 %}
18088
18089 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18090 predicate(UseAVX > 0);
18091 match(Set dst (MulF src1 src2));
18092
18093 format %{ "vmulss $dst, $src1, $src2" %}
18094 ins_cost(150);
18095 ins_encode %{
18096 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18097 %}
18098 ins_pipe(pipe_slow);
18099 %}
18100
18101 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18102 predicate(UseAVX > 0);
18103 match(Set dst (MulF src1 (LoadF src2)));
18104
18105 format %{ "vmulss $dst, $src1, $src2" %}
18106 ins_cost(150);
18107 ins_encode %{
18108 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18109 %}
18110 ins_pipe(pipe_slow);
18111 %}
18112
18113 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18114 predicate(UseAVX > 0);
18115 match(Set dst (MulF src con));
18116
18117 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18118 ins_cost(150);
18119 ins_encode %{
18120 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18121 %}
18122 ins_pipe(pipe_slow);
18123 %}
18124
18125 instruct mulD_reg(regD dst, regD src) %{
18126 predicate(UseAVX == 0);
18127 match(Set dst (MulD dst src));
18128
18129 format %{ "mulsd $dst, $src" %}
18130 ins_cost(150);
18131 ins_encode %{
18132 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18133 %}
18134 ins_pipe(pipe_slow);
18135 %}
18136
18137 instruct mulD_mem(regD dst, memory src) %{
18138 predicate(UseAVX == 0);
18139 match(Set dst (MulD dst (LoadD src)));
18140
18141 format %{ "mulsd $dst, $src" %}
18142 ins_cost(150);
18143 ins_encode %{
18144 __ mulsd($dst$$XMMRegister, $src$$Address);
18145 %}
18146 ins_pipe(pipe_slow);
18147 %}
18148
18149 instruct mulD_imm(regD dst, immD con) %{
18150 predicate(UseAVX == 0);
18151 match(Set dst (MulD dst con));
18152 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18153 ins_cost(150);
18154 ins_encode %{
18155 __ mulsd($dst$$XMMRegister, $constantaddress($con));
18156 %}
18157 ins_pipe(pipe_slow);
18158 %}
18159
18160 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18161 predicate(UseAVX > 0);
18162 match(Set dst (MulD src1 src2));
18163
18164 format %{ "vmulsd $dst, $src1, $src2" %}
18165 ins_cost(150);
18166 ins_encode %{
18167 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18168 %}
18169 ins_pipe(pipe_slow);
18170 %}
18171
18172 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18173 predicate(UseAVX > 0);
18174 match(Set dst (MulD src1 (LoadD src2)));
18175
18176 format %{ "vmulsd $dst, $src1, $src2" %}
18177 ins_cost(150);
18178 ins_encode %{
18179 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18180 %}
18181 ins_pipe(pipe_slow);
18182 %}
18183
18184 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18185 predicate(UseAVX > 0);
18186 match(Set dst (MulD src con));
18187
18188 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18189 ins_cost(150);
18190 ins_encode %{
18191 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18192 %}
18193 ins_pipe(pipe_slow);
18194 %}
18195
18196 instruct divF_reg(regF dst, regF src) %{
18197 predicate(UseAVX == 0);
18198 match(Set dst (DivF dst src));
18199
18200 format %{ "divss $dst, $src" %}
18201 ins_cost(150);
18202 ins_encode %{
18203 __ divss($dst$$XMMRegister, $src$$XMMRegister);
18204 %}
18205 ins_pipe(pipe_slow);
18206 %}
18207
18208 instruct divF_mem(regF dst, memory src) %{
18209 predicate(UseAVX == 0);
18210 match(Set dst (DivF dst (LoadF src)));
18211
18212 format %{ "divss $dst, $src" %}
18213 ins_cost(150);
18214 ins_encode %{
18215 __ divss($dst$$XMMRegister, $src$$Address);
18216 %}
18217 ins_pipe(pipe_slow);
18218 %}
18219
18220 instruct divF_imm(regF dst, immF con) %{
18221 predicate(UseAVX == 0);
18222 match(Set dst (DivF dst con));
18223 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18224 ins_cost(150);
18225 ins_encode %{
18226 __ divss($dst$$XMMRegister, $constantaddress($con));
18227 %}
18228 ins_pipe(pipe_slow);
18229 %}
18230
18231 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18232 predicate(UseAVX > 0);
18233 match(Set dst (DivF src1 src2));
18234
18235 format %{ "vdivss $dst, $src1, $src2" %}
18236 ins_cost(150);
18237 ins_encode %{
18238 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18239 %}
18240 ins_pipe(pipe_slow);
18241 %}
18242
18243 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18244 predicate(UseAVX > 0);
18245 match(Set dst (DivF src1 (LoadF src2)));
18246
18247 format %{ "vdivss $dst, $src1, $src2" %}
18248 ins_cost(150);
18249 ins_encode %{
18250 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18251 %}
18252 ins_pipe(pipe_slow);
18253 %}
18254
18255 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18256 predicate(UseAVX > 0);
18257 match(Set dst (DivF src con));
18258
18259 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18260 ins_cost(150);
18261 ins_encode %{
18262 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18263 %}
18264 ins_pipe(pipe_slow);
18265 %}
18266
18267 instruct divD_reg(regD dst, regD src) %{
18268 predicate(UseAVX == 0);
18269 match(Set dst (DivD dst src));
18270
18271 format %{ "divsd $dst, $src" %}
18272 ins_cost(150);
18273 ins_encode %{
18274 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18275 %}
18276 ins_pipe(pipe_slow);
18277 %}
18278
18279 instruct divD_mem(regD dst, memory src) %{
18280 predicate(UseAVX == 0);
18281 match(Set dst (DivD dst (LoadD src)));
18282
18283 format %{ "divsd $dst, $src" %}
18284 ins_cost(150);
18285 ins_encode %{
18286 __ divsd($dst$$XMMRegister, $src$$Address);
18287 %}
18288 ins_pipe(pipe_slow);
18289 %}
18290
18291 instruct divD_imm(regD dst, immD con) %{
18292 predicate(UseAVX == 0);
18293 match(Set dst (DivD dst con));
18294 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18295 ins_cost(150);
18296 ins_encode %{
18297 __ divsd($dst$$XMMRegister, $constantaddress($con));
18298 %}
18299 ins_pipe(pipe_slow);
18300 %}
18301
18302 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18303 predicate(UseAVX > 0);
18304 match(Set dst (DivD src1 src2));
18305
18306 format %{ "vdivsd $dst, $src1, $src2" %}
18307 ins_cost(150);
18308 ins_encode %{
18309 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18310 %}
18311 ins_pipe(pipe_slow);
18312 %}
18313
18314 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18315 predicate(UseAVX > 0);
18316 match(Set dst (DivD src1 (LoadD src2)));
18317
18318 format %{ "vdivsd $dst, $src1, $src2" %}
18319 ins_cost(150);
18320 ins_encode %{
18321 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18322 %}
18323 ins_pipe(pipe_slow);
18324 %}
18325
18326 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18327 predicate(UseAVX > 0);
18328 match(Set dst (DivD src con));
18329
18330 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18331 ins_cost(150);
18332 ins_encode %{
18333 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18334 %}
18335 ins_pipe(pipe_slow);
18336 %}
18337
18338 instruct absF_reg(regF dst) %{
18339 predicate(UseAVX == 0);
18340 match(Set dst (AbsF dst));
18341 ins_cost(150);
18342 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18343 ins_encode %{
18344 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18345 %}
18346 ins_pipe(pipe_slow);
18347 %}
18348
18349 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18350 predicate(UseAVX > 0);
18351 match(Set dst (AbsF src));
18352 ins_cost(150);
18353 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18354 ins_encode %{
18355 int vlen_enc = Assembler::AVX_128bit;
18356 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18357 ExternalAddress(float_signmask()), vlen_enc);
18358 %}
18359 ins_pipe(pipe_slow);
18360 %}
18361
18362 instruct absD_reg(regD dst) %{
18363 predicate(UseAVX == 0);
18364 match(Set dst (AbsD dst));
18365 ins_cost(150);
18366 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18367 "# abs double by sign masking" %}
18368 ins_encode %{
18369 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18370 %}
18371 ins_pipe(pipe_slow);
18372 %}
18373
18374 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18375 predicate(UseAVX > 0);
18376 match(Set dst (AbsD src));
18377 ins_cost(150);
18378 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18379 "# abs double by sign masking" %}
18380 ins_encode %{
18381 int vlen_enc = Assembler::AVX_128bit;
18382 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18383 ExternalAddress(double_signmask()), vlen_enc);
18384 %}
18385 ins_pipe(pipe_slow);
18386 %}
18387
18388 instruct negF_reg(regF dst) %{
18389 predicate(UseAVX == 0);
18390 match(Set dst (NegF dst));
18391 ins_cost(150);
18392 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18393 ins_encode %{
18394 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18395 %}
18396 ins_pipe(pipe_slow);
18397 %}
18398
18399 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18400 predicate(UseAVX > 0);
18401 match(Set dst (NegF src));
18402 ins_cost(150);
18403 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18404 ins_encode %{
18405 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18406 ExternalAddress(float_signflip()));
18407 %}
18408 ins_pipe(pipe_slow);
18409 %}
18410
18411 instruct negD_reg(regD dst) %{
18412 predicate(UseAVX == 0);
18413 match(Set dst (NegD dst));
18414 ins_cost(150);
18415 format %{ "xorpd $dst, [0x8000000000000000]\t"
18416 "# neg double by sign flipping" %}
18417 ins_encode %{
18418 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18419 %}
18420 ins_pipe(pipe_slow);
18421 %}
18422
18423 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18424 predicate(UseAVX > 0);
18425 match(Set dst (NegD src));
18426 ins_cost(150);
18427 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18428 "# neg double by sign flipping" %}
18429 ins_encode %{
18430 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18431 ExternalAddress(double_signflip()));
18432 %}
18433 ins_pipe(pipe_slow);
18434 %}
18435
18436 // sqrtss instruction needs destination register to be pre initialized for best performance
18437 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18438 instruct sqrtF_reg(regF dst) %{
18439 match(Set dst (SqrtF dst));
18440 format %{ "sqrtss $dst, $dst" %}
18441 ins_encode %{
18442 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18443 %}
18444 ins_pipe(pipe_slow);
18445 %}
18446
18447 // sqrtsd instruction needs destination register to be pre initialized for best performance
18448 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18449 instruct sqrtD_reg(regD dst) %{
18450 match(Set dst (SqrtD dst));
18451 format %{ "sqrtsd $dst, $dst" %}
18452 ins_encode %{
18453 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18454 %}
18455 ins_pipe(pipe_slow);
18456 %}
18457
18458 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18459 effect(TEMP tmp);
18460 match(Set dst (ConvF2HF src));
18461 ins_cost(125);
18462 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18463 ins_encode %{
18464 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18465 %}
18466 ins_pipe( pipe_slow );
18467 %}
18468
18469 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18470 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18471 effect(TEMP ktmp, TEMP rtmp);
18472 match(Set mem (StoreC mem (ConvF2HF src)));
18473 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18474 ins_encode %{
18475 __ movl($rtmp$$Register, 0x1);
18476 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18477 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18478 %}
18479 ins_pipe( pipe_slow );
18480 %}
18481
18482 instruct vconvF2HF(vec dst, vec src) %{
18483 match(Set dst (VectorCastF2HF src));
18484 format %{ "vector_conv_F2HF $dst $src" %}
18485 ins_encode %{
18486 int vlen_enc = vector_length_encoding(this, $src);
18487 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18488 %}
18489 ins_pipe( pipe_slow );
18490 %}
18491
18492 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18493 predicate(n->as_StoreVector()->memory_size() >= 16);
18494 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18495 format %{ "vcvtps2ph $mem,$src" %}
18496 ins_encode %{
18497 int vlen_enc = vector_length_encoding(this, $src);
18498 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18499 %}
18500 ins_pipe( pipe_slow );
18501 %}
18502
18503 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18504 match(Set dst (ConvHF2F src));
18505 format %{ "vcvtph2ps $dst,$src" %}
18506 ins_encode %{
18507 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18508 %}
18509 ins_pipe( pipe_slow );
18510 %}
18511
18512 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18513 match(Set dst (VectorCastHF2F (LoadVector mem)));
18514 format %{ "vcvtph2ps $dst,$mem" %}
18515 ins_encode %{
18516 int vlen_enc = vector_length_encoding(this);
18517 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18518 %}
18519 ins_pipe( pipe_slow );
18520 %}
18521
18522 instruct vconvHF2F(vec dst, vec src) %{
18523 match(Set dst (VectorCastHF2F src));
18524 ins_cost(125);
18525 format %{ "vector_conv_HF2F $dst,$src" %}
18526 ins_encode %{
18527 int vlen_enc = vector_length_encoding(this);
18528 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18529 %}
18530 ins_pipe( pipe_slow );
18531 %}
18532
18533 // ---------------------------------------- VectorReinterpret ------------------------------------
18534 instruct reinterpret_mask(kReg dst) %{
18535 predicate(n->bottom_type()->isa_vectmask() &&
18536 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18537 match(Set dst (VectorReinterpret dst));
18538 ins_cost(125);
18539 format %{ "vector_reinterpret $dst\t!" %}
18540 ins_encode %{
18541 // empty
18542 %}
18543 ins_pipe( pipe_slow );
18544 %}
18545
18546 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18547 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18548 n->bottom_type()->isa_vectmask() &&
18549 n->in(1)->bottom_type()->isa_vectmask() &&
18550 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18551 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18552 match(Set dst (VectorReinterpret src));
18553 effect(TEMP xtmp);
18554 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18555 ins_encode %{
18556 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18557 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18558 assert(src_sz == dst_sz , "src and dst size mismatch");
18559 int vlen_enc = vector_length_encoding(src_sz);
18560 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18561 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18562 %}
18563 ins_pipe( pipe_slow );
18564 %}
18565
18566 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18567 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18568 n->bottom_type()->isa_vectmask() &&
18569 n->in(1)->bottom_type()->isa_vectmask() &&
18570 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18571 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18572 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18573 match(Set dst (VectorReinterpret src));
18574 effect(TEMP xtmp);
18575 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18576 ins_encode %{
18577 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18578 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18579 assert(src_sz == dst_sz , "src and dst size mismatch");
18580 int vlen_enc = vector_length_encoding(src_sz);
18581 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18582 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18583 %}
18584 ins_pipe( pipe_slow );
18585 %}
18586
18587 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18588 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18589 n->bottom_type()->isa_vectmask() &&
18590 n->in(1)->bottom_type()->isa_vectmask() &&
18591 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18592 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18593 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18594 match(Set dst (VectorReinterpret src));
18595 effect(TEMP xtmp);
18596 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18597 ins_encode %{
18598 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18599 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18600 assert(src_sz == dst_sz , "src and dst size mismatch");
18601 int vlen_enc = vector_length_encoding(src_sz);
18602 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18603 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18604 %}
18605 ins_pipe( pipe_slow );
18606 %}
18607
18608 instruct reinterpret(vec dst) %{
18609 predicate(!n->bottom_type()->isa_vectmask() &&
18610 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18611 match(Set dst (VectorReinterpret dst));
18612 ins_cost(125);
18613 format %{ "vector_reinterpret $dst\t!" %}
18614 ins_encode %{
18615 // empty
18616 %}
18617 ins_pipe( pipe_slow );
18618 %}
18619
18620 instruct reinterpret_expand(vec dst, vec src) %{
18621 predicate(UseAVX == 0 &&
18622 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18623 match(Set dst (VectorReinterpret src));
18624 ins_cost(125);
18625 effect(TEMP dst);
18626 format %{ "vector_reinterpret_expand $dst,$src" %}
18627 ins_encode %{
18628 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18629 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18630
18631 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18632 if (src_vlen_in_bytes == 4) {
18633 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18634 } else {
18635 assert(src_vlen_in_bytes == 8, "");
18636 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18637 }
18638 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18639 %}
18640 ins_pipe( pipe_slow );
18641 %}
18642
18643 instruct vreinterpret_expand4(legVec dst, vec src) %{
18644 predicate(UseAVX > 0 &&
18645 !n->bottom_type()->isa_vectmask() &&
18646 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18647 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18648 match(Set dst (VectorReinterpret src));
18649 ins_cost(125);
18650 format %{ "vector_reinterpret_expand $dst,$src" %}
18651 ins_encode %{
18652 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18653 %}
18654 ins_pipe( pipe_slow );
18655 %}
18656
18657
18658 instruct vreinterpret_expand(legVec dst, vec src) %{
18659 predicate(UseAVX > 0 &&
18660 !n->bottom_type()->isa_vectmask() &&
18661 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18662 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18663 match(Set dst (VectorReinterpret src));
18664 ins_cost(125);
18665 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18666 ins_encode %{
18667 switch (Matcher::vector_length_in_bytes(this, $src)) {
18668 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18669 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18670 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18671 default: ShouldNotReachHere();
18672 }
18673 %}
18674 ins_pipe( pipe_slow );
18675 %}
18676
18677 instruct reinterpret_shrink(vec dst, legVec src) %{
18678 predicate(!n->bottom_type()->isa_vectmask() &&
18679 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18680 match(Set dst (VectorReinterpret src));
18681 ins_cost(125);
18682 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18683 ins_encode %{
18684 switch (Matcher::vector_length_in_bytes(this)) {
18685 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18686 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18687 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18688 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18689 default: ShouldNotReachHere();
18690 }
18691 %}
18692 ins_pipe( pipe_slow );
18693 %}
18694
18695 // ----------------------------------------------------------------------------------------------------
18696
18697 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18698 match(Set dst (RoundDoubleMode src rmode));
18699 format %{ "roundsd $dst,$src" %}
18700 ins_cost(150);
18701 ins_encode %{
18702 assert(UseSSE >= 4, "required");
18703 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18704 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18705 }
18706 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18707 %}
18708 ins_pipe(pipe_slow);
18709 %}
18710
18711 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18712 match(Set dst (RoundDoubleMode con rmode));
18713 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18714 ins_cost(150);
18715 ins_encode %{
18716 assert(UseSSE >= 4, "required");
18717 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18718 %}
18719 ins_pipe(pipe_slow);
18720 %}
18721
18722 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18723 predicate(Matcher::vector_length(n) < 8);
18724 match(Set dst (RoundDoubleModeV src rmode));
18725 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18726 ins_encode %{
18727 assert(UseAVX > 0, "required");
18728 int vlen_enc = vector_length_encoding(this);
18729 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18730 %}
18731 ins_pipe( pipe_slow );
18732 %}
18733
18734 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18735 predicate(Matcher::vector_length(n) == 8);
18736 match(Set dst (RoundDoubleModeV src rmode));
18737 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18738 ins_encode %{
18739 assert(UseAVX > 2, "required");
18740 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18741 %}
18742 ins_pipe( pipe_slow );
18743 %}
18744
18745 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18746 predicate(Matcher::vector_length(n) < 8);
18747 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18748 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18749 ins_encode %{
18750 assert(UseAVX > 0, "required");
18751 int vlen_enc = vector_length_encoding(this);
18752 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18753 %}
18754 ins_pipe( pipe_slow );
18755 %}
18756
18757 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18758 predicate(Matcher::vector_length(n) == 8);
18759 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18760 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18761 ins_encode %{
18762 assert(UseAVX > 2, "required");
18763 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18764 %}
18765 ins_pipe( pipe_slow );
18766 %}
18767
18768 instruct onspinwait() %{
18769 match(OnSpinWait);
18770 ins_cost(200);
18771
18772 format %{
18773 $$template
18774 $$emit$$"pause\t! membar_onspinwait"
18775 %}
18776 ins_encode %{
18777 __ pause();
18778 %}
18779 ins_pipe(pipe_slow);
18780 %}
18781
18782 // a * b + c
18783 instruct fmaD_reg(regD a, regD b, regD c) %{
18784 match(Set c (FmaD c (Binary a b)));
18785 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18786 ins_cost(150);
18787 ins_encode %{
18788 assert(UseFMA, "Needs FMA instructions support.");
18789 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18790 %}
18791 ins_pipe( pipe_slow );
18792 %}
18793
18794 // a * b + c
18795 instruct fmaF_reg(regF a, regF b, regF c) %{
18796 match(Set c (FmaF c (Binary a b)));
18797 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18798 ins_cost(150);
18799 ins_encode %{
18800 assert(UseFMA, "Needs FMA instructions support.");
18801 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18802 %}
18803 ins_pipe( pipe_slow );
18804 %}
18805
18806 // ====================VECTOR INSTRUCTIONS=====================================
18807
18808 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18809 instruct MoveVec2Leg(legVec dst, vec src) %{
18810 match(Set dst src);
18811 format %{ "" %}
18812 ins_encode %{
18813 ShouldNotReachHere();
18814 %}
18815 ins_pipe( fpu_reg_reg );
18816 %}
18817
18818 instruct MoveLeg2Vec(vec dst, legVec src) %{
18819 match(Set dst src);
18820 format %{ "" %}
18821 ins_encode %{
18822 ShouldNotReachHere();
18823 %}
18824 ins_pipe( fpu_reg_reg );
18825 %}
18826
18827 // ============================================================================
18828
18829 // Load vectors generic operand pattern
18830 instruct loadV(vec dst, memory mem) %{
18831 match(Set dst (LoadVector mem));
18832 ins_cost(125);
18833 format %{ "load_vector $dst,$mem" %}
18834 ins_encode %{
18835 BasicType bt = Matcher::vector_element_basic_type(this);
18836 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18837 %}
18838 ins_pipe( pipe_slow );
18839 %}
18840
18841 // Store vectors generic operand pattern.
18842 instruct storeV(memory mem, vec src) %{
18843 match(Set mem (StoreVector mem src));
18844 ins_cost(145);
18845 format %{ "store_vector $mem,$src\n\t" %}
18846 ins_encode %{
18847 switch (Matcher::vector_length_in_bytes(this, $src)) {
18848 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18849 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18850 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18851 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18852 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18853 default: ShouldNotReachHere();
18854 }
18855 %}
18856 ins_pipe( pipe_slow );
18857 %}
18858
18859 // ---------------------------------------- Gather ------------------------------------
18860
18861 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18862
18863 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18864 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18865 Matcher::vector_length_in_bytes(n) <= 32);
18866 match(Set dst (LoadVectorGather mem idx));
18867 effect(TEMP dst, TEMP tmp, TEMP mask);
18868 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18869 ins_encode %{
18870 int vlen_enc = vector_length_encoding(this);
18871 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18872 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18873 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18874 __ lea($tmp$$Register, $mem$$Address);
18875 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18876 %}
18877 ins_pipe( pipe_slow );
18878 %}
18879
18880
18881 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18882 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18883 !is_subword_type(Matcher::vector_element_basic_type(n)));
18884 match(Set dst (LoadVectorGather mem idx));
18885 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18886 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18887 ins_encode %{
18888 int vlen_enc = vector_length_encoding(this);
18889 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18890 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18891 __ lea($tmp$$Register, $mem$$Address);
18892 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18893 %}
18894 ins_pipe( pipe_slow );
18895 %}
18896
18897 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18898 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18899 !is_subword_type(Matcher::vector_element_basic_type(n)));
18900 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18901 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18902 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18903 ins_encode %{
18904 assert(UseAVX > 2, "sanity");
18905 int vlen_enc = vector_length_encoding(this);
18906 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18907 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18908 // Note: Since gather instruction partially updates the opmask register used
18909 // for predication hense moving mask operand to a temporary.
18910 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18911 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18912 __ lea($tmp$$Register, $mem$$Address);
18913 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18914 %}
18915 ins_pipe( pipe_slow );
18916 %}
18917
18918 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18919 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18920 match(Set dst (LoadVectorGather mem idx_base));
18921 effect(TEMP tmp, TEMP rtmp);
18922 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18923 ins_encode %{
18924 int vlen_enc = vector_length_encoding(this);
18925 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18926 __ lea($tmp$$Register, $mem$$Address);
18927 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18928 %}
18929 ins_pipe( pipe_slow );
18930 %}
18931
18932 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18933 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18934 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18935 match(Set dst (LoadVectorGather mem idx_base));
18936 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18937 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18938 ins_encode %{
18939 int vlen_enc = vector_length_encoding(this);
18940 int vector_len = Matcher::vector_length(this);
18941 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18942 __ lea($tmp$$Register, $mem$$Address);
18943 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18944 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18945 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18946 %}
18947 ins_pipe( pipe_slow );
18948 %}
18949
18950 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18951 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18952 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18953 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18954 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18955 ins_encode %{
18956 int vlen_enc = vector_length_encoding(this);
18957 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18958 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18959 __ lea($tmp$$Register, $mem$$Address);
18960 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18961 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18962 %}
18963 ins_pipe( pipe_slow );
18964 %}
18965
18966 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18967 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18968 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18969 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18970 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18971 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18972 ins_encode %{
18973 int vlen_enc = vector_length_encoding(this);
18974 int vector_len = Matcher::vector_length(this);
18975 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18976 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18977 __ lea($tmp$$Register, $mem$$Address);
18978 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18979 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18980 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18981 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18982 %}
18983 ins_pipe( pipe_slow );
18984 %}
18985
18986 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18987 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18988 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18989 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18990 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18991 ins_encode %{
18992 int vlen_enc = vector_length_encoding(this);
18993 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18994 __ lea($tmp$$Register, $mem$$Address);
18995 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18996 if (elem_bt == T_SHORT) {
18997 __ movl($mask_idx$$Register, 0x55555555);
18998 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18999 }
19000 __ xorl($mask_idx$$Register, $mask_idx$$Register);
19001 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
19002 %}
19003 ins_pipe( pipe_slow );
19004 %}
19005
19006 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
19007 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
19008 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
19009 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
19010 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
19011 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
19012 ins_encode %{
19013 int vlen_enc = vector_length_encoding(this);
19014 int vector_len = Matcher::vector_length(this);
19015 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19016 __ lea($tmp$$Register, $mem$$Address);
19017 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
19018 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
19019 if (elem_bt == T_SHORT) {
19020 __ movl($mask_idx$$Register, 0x55555555);
19021 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
19022 }
19023 __ xorl($mask_idx$$Register, $mask_idx$$Register);
19024 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
19025 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
19026 %}
19027 ins_pipe( pipe_slow );
19028 %}
19029
19030 // ====================Scatter=======================================
19031
19032 // Scatter INT, LONG, FLOAT, DOUBLE
19033
19034 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19035 predicate(UseAVX > 2);
19036 match(Set mem (StoreVectorScatter mem (Binary src idx)));
19037 effect(TEMP tmp, TEMP ktmp);
19038 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19039 ins_encode %{
19040 int vlen_enc = vector_length_encoding(this, $src);
19041 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19042
19043 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19044 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19045
19046 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19047 __ lea($tmp$$Register, $mem$$Address);
19048 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19049 %}
19050 ins_pipe( pipe_slow );
19051 %}
19052
19053 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19054 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19055 effect(TEMP tmp, TEMP ktmp);
19056 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19057 ins_encode %{
19058 int vlen_enc = vector_length_encoding(this, $src);
19059 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19060 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19061 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19062 // Note: Since scatter instruction partially updates the opmask register used
19063 // for predication hense moving mask operand to a temporary.
19064 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19065 __ lea($tmp$$Register, $mem$$Address);
19066 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19067 %}
19068 ins_pipe( pipe_slow );
19069 %}
19070
19071 // ====================REPLICATE=======================================
19072
19073 // Replicate byte scalar to be vector
19074 instruct vReplB_reg(vec dst, rRegI src) %{
19075 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19076 match(Set dst (Replicate src));
19077 format %{ "replicateB $dst,$src" %}
19078 ins_encode %{
19079 uint vlen = Matcher::vector_length(this);
19080 if (UseAVX >= 2) {
19081 int vlen_enc = vector_length_encoding(this);
19082 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19083 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19084 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19085 } else {
19086 __ movdl($dst$$XMMRegister, $src$$Register);
19087 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19088 }
19089 } else {
19090 assert(UseAVX < 2, "");
19091 __ movdl($dst$$XMMRegister, $src$$Register);
19092 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19093 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19094 if (vlen >= 16) {
19095 assert(vlen == 16, "");
19096 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19097 }
19098 }
19099 %}
19100 ins_pipe( pipe_slow );
19101 %}
19102
19103 instruct ReplB_mem(vec dst, memory mem) %{
19104 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19105 match(Set dst (Replicate (LoadB mem)));
19106 format %{ "replicateB $dst,$mem" %}
19107 ins_encode %{
19108 int vlen_enc = vector_length_encoding(this);
19109 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19110 %}
19111 ins_pipe( pipe_slow );
19112 %}
19113
19114 // ====================ReplicateS=======================================
19115
19116 instruct vReplS_reg(vec dst, rRegI src) %{
19117 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19118 match(Set dst (Replicate src));
19119 format %{ "replicateS $dst,$src" %}
19120 ins_encode %{
19121 uint vlen = Matcher::vector_length(this);
19122 int vlen_enc = vector_length_encoding(this);
19123 if (UseAVX >= 2) {
19124 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19125 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19126 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19127 } else {
19128 __ movdl($dst$$XMMRegister, $src$$Register);
19129 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19130 }
19131 } else {
19132 assert(UseAVX < 2, "");
19133 __ movdl($dst$$XMMRegister, $src$$Register);
19134 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19135 if (vlen >= 8) {
19136 assert(vlen == 8, "");
19137 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19138 }
19139 }
19140 %}
19141 ins_pipe( pipe_slow );
19142 %}
19143
19144 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19145 match(Set dst (Replicate con));
19146 effect(TEMP rtmp);
19147 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19148 ins_encode %{
19149 int vlen_enc = vector_length_encoding(this);
19150 BasicType bt = Matcher::vector_element_basic_type(this);
19151 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19152 __ movl($rtmp$$Register, $con$$constant);
19153 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19154 %}
19155 ins_pipe( pipe_slow );
19156 %}
19157
19158 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19159 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19160 match(Set dst (Replicate src));
19161 effect(TEMP rtmp);
19162 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19163 ins_encode %{
19164 int vlen_enc = vector_length_encoding(this);
19165 __ evmovw($rtmp$$Register, $src$$XMMRegister);
19166 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19167 %}
19168 ins_pipe( pipe_slow );
19169 %}
19170
19171 instruct ReplS_mem(vec dst, memory mem) %{
19172 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19173 match(Set dst (Replicate (LoadS mem)));
19174 format %{ "replicateS $dst,$mem" %}
19175 ins_encode %{
19176 int vlen_enc = vector_length_encoding(this);
19177 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19178 %}
19179 ins_pipe( pipe_slow );
19180 %}
19181
19182 // ====================ReplicateI=======================================
19183
19184 instruct ReplI_reg(vec dst, rRegI src) %{
19185 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19186 match(Set dst (Replicate src));
19187 format %{ "replicateI $dst,$src" %}
19188 ins_encode %{
19189 uint vlen = Matcher::vector_length(this);
19190 int vlen_enc = vector_length_encoding(this);
19191 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19192 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19193 } else if (VM_Version::supports_avx2()) {
19194 __ movdl($dst$$XMMRegister, $src$$Register);
19195 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19196 } else {
19197 __ movdl($dst$$XMMRegister, $src$$Register);
19198 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19199 }
19200 %}
19201 ins_pipe( pipe_slow );
19202 %}
19203
19204 instruct ReplI_mem(vec dst, memory mem) %{
19205 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19206 match(Set dst (Replicate (LoadI mem)));
19207 format %{ "replicateI $dst,$mem" %}
19208 ins_encode %{
19209 int vlen_enc = vector_length_encoding(this);
19210 if (VM_Version::supports_avx2()) {
19211 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19212 } else if (VM_Version::supports_avx()) {
19213 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19214 } else {
19215 __ movdl($dst$$XMMRegister, $mem$$Address);
19216 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19217 }
19218 %}
19219 ins_pipe( pipe_slow );
19220 %}
19221
19222 instruct ReplI_imm(vec dst, immI con) %{
19223 predicate(Matcher::is_non_long_integral_vector(n));
19224 match(Set dst (Replicate con));
19225 format %{ "replicateI $dst,$con" %}
19226 ins_encode %{
19227 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19228 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19229 type2aelembytes(Matcher::vector_element_basic_type(this))));
19230 BasicType bt = Matcher::vector_element_basic_type(this);
19231 int vlen = Matcher::vector_length_in_bytes(this);
19232 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19233 %}
19234 ins_pipe( pipe_slow );
19235 %}
19236
19237 // Replicate scalar zero to be vector
19238 instruct ReplI_zero(vec dst, immI_0 zero) %{
19239 predicate(Matcher::is_non_long_integral_vector(n));
19240 match(Set dst (Replicate zero));
19241 format %{ "replicateI $dst,$zero" %}
19242 ins_encode %{
19243 int vlen_enc = vector_length_encoding(this);
19244 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19245 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19246 } else {
19247 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19248 }
19249 %}
19250 ins_pipe( fpu_reg_reg );
19251 %}
19252
19253 instruct ReplI_M1(vec dst, immI_M1 con) %{
19254 predicate(Matcher::is_non_long_integral_vector(n));
19255 match(Set dst (Replicate con));
19256 format %{ "vallones $dst" %}
19257 ins_encode %{
19258 int vector_len = vector_length_encoding(this);
19259 __ vallones($dst$$XMMRegister, vector_len);
19260 %}
19261 ins_pipe( pipe_slow );
19262 %}
19263
19264 // ====================ReplicateL=======================================
19265
19266 // Replicate long (8 byte) scalar to be vector
19267 instruct ReplL_reg(vec dst, rRegL src) %{
19268 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19269 match(Set dst (Replicate src));
19270 format %{ "replicateL $dst,$src" %}
19271 ins_encode %{
19272 int vlen = Matcher::vector_length(this);
19273 int vlen_enc = vector_length_encoding(this);
19274 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19275 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19276 } else if (VM_Version::supports_avx2()) {
19277 __ movdq($dst$$XMMRegister, $src$$Register);
19278 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19279 } else {
19280 __ movdq($dst$$XMMRegister, $src$$Register);
19281 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19282 }
19283 %}
19284 ins_pipe( pipe_slow );
19285 %}
19286
19287 instruct ReplL_mem(vec dst, memory mem) %{
19288 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19289 match(Set dst (Replicate (LoadL mem)));
19290 format %{ "replicateL $dst,$mem" %}
19291 ins_encode %{
19292 int vlen_enc = vector_length_encoding(this);
19293 if (VM_Version::supports_avx2()) {
19294 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19295 } else if (VM_Version::supports_sse3()) {
19296 __ movddup($dst$$XMMRegister, $mem$$Address);
19297 } else {
19298 __ movq($dst$$XMMRegister, $mem$$Address);
19299 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19300 }
19301 %}
19302 ins_pipe( pipe_slow );
19303 %}
19304
19305 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19306 instruct ReplL_imm(vec dst, immL con) %{
19307 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19308 match(Set dst (Replicate con));
19309 format %{ "replicateL $dst,$con" %}
19310 ins_encode %{
19311 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19312 int vlen = Matcher::vector_length_in_bytes(this);
19313 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19314 %}
19315 ins_pipe( pipe_slow );
19316 %}
19317
19318 instruct ReplL_zero(vec dst, immL0 zero) %{
19319 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19320 match(Set dst (Replicate zero));
19321 format %{ "replicateL $dst,$zero" %}
19322 ins_encode %{
19323 int vlen_enc = vector_length_encoding(this);
19324 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19325 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19326 } else {
19327 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19328 }
19329 %}
19330 ins_pipe( fpu_reg_reg );
19331 %}
19332
19333 instruct ReplL_M1(vec dst, immL_M1 con) %{
19334 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19335 match(Set dst (Replicate con));
19336 format %{ "vallones $dst" %}
19337 ins_encode %{
19338 int vector_len = vector_length_encoding(this);
19339 __ vallones($dst$$XMMRegister, vector_len);
19340 %}
19341 ins_pipe( pipe_slow );
19342 %}
19343
19344 // ====================ReplicateF=======================================
19345
19346 instruct vReplF_reg(vec dst, vlRegF src) %{
19347 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19348 match(Set dst (Replicate src));
19349 format %{ "replicateF $dst,$src" %}
19350 ins_encode %{
19351 uint vlen = Matcher::vector_length(this);
19352 int vlen_enc = vector_length_encoding(this);
19353 if (vlen <= 4) {
19354 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19355 } else if (VM_Version::supports_avx2()) {
19356 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19357 } else {
19358 assert(vlen == 8, "sanity");
19359 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19360 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19361 }
19362 %}
19363 ins_pipe( pipe_slow );
19364 %}
19365
19366 instruct ReplF_reg(vec dst, vlRegF src) %{
19367 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19368 match(Set dst (Replicate src));
19369 format %{ "replicateF $dst,$src" %}
19370 ins_encode %{
19371 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19372 %}
19373 ins_pipe( pipe_slow );
19374 %}
19375
19376 instruct ReplF_mem(vec dst, memory mem) %{
19377 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19378 match(Set dst (Replicate (LoadF mem)));
19379 format %{ "replicateF $dst,$mem" %}
19380 ins_encode %{
19381 int vlen_enc = vector_length_encoding(this);
19382 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19383 %}
19384 ins_pipe( pipe_slow );
19385 %}
19386
19387 // Replicate float scalar immediate to be vector by loading from const table.
19388 instruct ReplF_imm(vec dst, immF con) %{
19389 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19390 match(Set dst (Replicate con));
19391 format %{ "replicateF $dst,$con" %}
19392 ins_encode %{
19393 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19394 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19395 int vlen = Matcher::vector_length_in_bytes(this);
19396 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19397 %}
19398 ins_pipe( pipe_slow );
19399 %}
19400
19401 instruct ReplF_zero(vec dst, immF0 zero) %{
19402 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19403 match(Set dst (Replicate zero));
19404 format %{ "replicateF $dst,$zero" %}
19405 ins_encode %{
19406 int vlen_enc = vector_length_encoding(this);
19407 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19408 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19409 } else {
19410 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19411 }
19412 %}
19413 ins_pipe( fpu_reg_reg );
19414 %}
19415
19416 // ====================ReplicateD=======================================
19417
19418 // Replicate double (8 bytes) scalar to be vector
19419 instruct vReplD_reg(vec dst, vlRegD src) %{
19420 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19421 match(Set dst (Replicate src));
19422 format %{ "replicateD $dst,$src" %}
19423 ins_encode %{
19424 uint vlen = Matcher::vector_length(this);
19425 int vlen_enc = vector_length_encoding(this);
19426 if (vlen <= 2) {
19427 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19428 } else if (VM_Version::supports_avx2()) {
19429 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19430 } else {
19431 assert(vlen == 4, "sanity");
19432 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19433 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19434 }
19435 %}
19436 ins_pipe( pipe_slow );
19437 %}
19438
19439 instruct ReplD_reg(vec dst, vlRegD src) %{
19440 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19441 match(Set dst (Replicate src));
19442 format %{ "replicateD $dst,$src" %}
19443 ins_encode %{
19444 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19445 %}
19446 ins_pipe( pipe_slow );
19447 %}
19448
19449 instruct ReplD_mem(vec dst, memory mem) %{
19450 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19451 match(Set dst (Replicate (LoadD mem)));
19452 format %{ "replicateD $dst,$mem" %}
19453 ins_encode %{
19454 if (Matcher::vector_length(this) >= 4) {
19455 int vlen_enc = vector_length_encoding(this);
19456 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19457 } else {
19458 __ movddup($dst$$XMMRegister, $mem$$Address);
19459 }
19460 %}
19461 ins_pipe( pipe_slow );
19462 %}
19463
19464 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19465 instruct ReplD_imm(vec dst, immD con) %{
19466 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19467 match(Set dst (Replicate con));
19468 format %{ "replicateD $dst,$con" %}
19469 ins_encode %{
19470 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19471 int vlen = Matcher::vector_length_in_bytes(this);
19472 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19473 %}
19474 ins_pipe( pipe_slow );
19475 %}
19476
19477 instruct ReplD_zero(vec dst, immD0 zero) %{
19478 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19479 match(Set dst (Replicate zero));
19480 format %{ "replicateD $dst,$zero" %}
19481 ins_encode %{
19482 int vlen_enc = vector_length_encoding(this);
19483 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19484 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19485 } else {
19486 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19487 }
19488 %}
19489 ins_pipe( fpu_reg_reg );
19490 %}
19491
19492 // ====================VECTOR INSERT=======================================
19493
19494 instruct insert(vec dst, rRegI val, immU8 idx) %{
19495 predicate(Matcher::vector_length_in_bytes(n) < 32);
19496 match(Set dst (VectorInsert (Binary dst val) idx));
19497 format %{ "vector_insert $dst,$val,$idx" %}
19498 ins_encode %{
19499 assert(UseSSE >= 4, "required");
19500 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19501
19502 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19503
19504 assert(is_integral_type(elem_bt), "");
19505 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19506
19507 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19508 %}
19509 ins_pipe( pipe_slow );
19510 %}
19511
19512 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19513 predicate(Matcher::vector_length_in_bytes(n) == 32);
19514 match(Set dst (VectorInsert (Binary src val) idx));
19515 effect(TEMP vtmp);
19516 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19517 ins_encode %{
19518 int vlen_enc = Assembler::AVX_256bit;
19519 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19520 int elem_per_lane = 16/type2aelembytes(elem_bt);
19521 int log2epr = log2(elem_per_lane);
19522
19523 assert(is_integral_type(elem_bt), "sanity");
19524 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19525
19526 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19527 uint y_idx = ($idx$$constant >> log2epr) & 1;
19528 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19529 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19530 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19531 %}
19532 ins_pipe( pipe_slow );
19533 %}
19534
19535 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19536 predicate(Matcher::vector_length_in_bytes(n) == 64);
19537 match(Set dst (VectorInsert (Binary src val) idx));
19538 effect(TEMP vtmp);
19539 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19540 ins_encode %{
19541 assert(UseAVX > 2, "sanity");
19542
19543 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19544 int elem_per_lane = 16/type2aelembytes(elem_bt);
19545 int log2epr = log2(elem_per_lane);
19546
19547 assert(is_integral_type(elem_bt), "");
19548 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19549
19550 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19551 uint y_idx = ($idx$$constant >> log2epr) & 3;
19552 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19553 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19554 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19555 %}
19556 ins_pipe( pipe_slow );
19557 %}
19558
19559 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19560 predicate(Matcher::vector_length(n) == 2);
19561 match(Set dst (VectorInsert (Binary dst val) idx));
19562 format %{ "vector_insert $dst,$val,$idx" %}
19563 ins_encode %{
19564 assert(UseSSE >= 4, "required");
19565 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19566 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19567
19568 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19569 %}
19570 ins_pipe( pipe_slow );
19571 %}
19572
19573 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19574 predicate(Matcher::vector_length(n) == 4);
19575 match(Set dst (VectorInsert (Binary src val) idx));
19576 effect(TEMP vtmp);
19577 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19578 ins_encode %{
19579 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19580 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19581
19582 uint x_idx = $idx$$constant & right_n_bits(1);
19583 uint y_idx = ($idx$$constant >> 1) & 1;
19584 int vlen_enc = Assembler::AVX_256bit;
19585 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19586 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19587 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19588 %}
19589 ins_pipe( pipe_slow );
19590 %}
19591
19592 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19593 predicate(Matcher::vector_length(n) == 8);
19594 match(Set dst (VectorInsert (Binary src val) idx));
19595 effect(TEMP vtmp);
19596 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19597 ins_encode %{
19598 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19599 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19600
19601 uint x_idx = $idx$$constant & right_n_bits(1);
19602 uint y_idx = ($idx$$constant >> 1) & 3;
19603 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19604 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19605 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19606 %}
19607 ins_pipe( pipe_slow );
19608 %}
19609
19610 instruct insertF(vec dst, regF val, immU8 idx) %{
19611 predicate(Matcher::vector_length(n) < 8);
19612 match(Set dst (VectorInsert (Binary dst val) idx));
19613 format %{ "vector_insert $dst,$val,$idx" %}
19614 ins_encode %{
19615 assert(UseSSE >= 4, "sanity");
19616
19617 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19618 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19619
19620 uint x_idx = $idx$$constant & right_n_bits(2);
19621 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19622 %}
19623 ins_pipe( pipe_slow );
19624 %}
19625
19626 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19627 predicate(Matcher::vector_length(n) >= 8);
19628 match(Set dst (VectorInsert (Binary src val) idx));
19629 effect(TEMP vtmp);
19630 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19631 ins_encode %{
19632 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19633 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19634
19635 int vlen = Matcher::vector_length(this);
19636 uint x_idx = $idx$$constant & right_n_bits(2);
19637 if (vlen == 8) {
19638 uint y_idx = ($idx$$constant >> 2) & 1;
19639 int vlen_enc = Assembler::AVX_256bit;
19640 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19641 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19642 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19643 } else {
19644 assert(vlen == 16, "sanity");
19645 uint y_idx = ($idx$$constant >> 2) & 3;
19646 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19647 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19648 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19649 }
19650 %}
19651 ins_pipe( pipe_slow );
19652 %}
19653
19654 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19655 predicate(Matcher::vector_length(n) == 2);
19656 match(Set dst (VectorInsert (Binary dst val) idx));
19657 effect(TEMP tmp);
19658 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19659 ins_encode %{
19660 assert(UseSSE >= 4, "sanity");
19661 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19662 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19663
19664 __ movq($tmp$$Register, $val$$XMMRegister);
19665 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19666 %}
19667 ins_pipe( pipe_slow );
19668 %}
19669
19670 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19671 predicate(Matcher::vector_length(n) == 4);
19672 match(Set dst (VectorInsert (Binary src val) idx));
19673 effect(TEMP vtmp, TEMP tmp);
19674 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19675 ins_encode %{
19676 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19677 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19678
19679 uint x_idx = $idx$$constant & right_n_bits(1);
19680 uint y_idx = ($idx$$constant >> 1) & 1;
19681 int vlen_enc = Assembler::AVX_256bit;
19682 __ movq($tmp$$Register, $val$$XMMRegister);
19683 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19684 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19685 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19686 %}
19687 ins_pipe( pipe_slow );
19688 %}
19689
19690 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19691 predicate(Matcher::vector_length(n) == 8);
19692 match(Set dst (VectorInsert (Binary src val) idx));
19693 effect(TEMP tmp, TEMP vtmp);
19694 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19695 ins_encode %{
19696 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19697 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19698
19699 uint x_idx = $idx$$constant & right_n_bits(1);
19700 uint y_idx = ($idx$$constant >> 1) & 3;
19701 __ movq($tmp$$Register, $val$$XMMRegister);
19702 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19703 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19704 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19705 %}
19706 ins_pipe( pipe_slow );
19707 %}
19708
19709 // ====================REDUCTION ARITHMETIC=======================================
19710
19711 // =======================Int Reduction==========================================
19712
19713 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19714 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19715 match(Set dst (AddReductionVI src1 src2));
19716 match(Set dst (MulReductionVI src1 src2));
19717 match(Set dst (AndReductionV src1 src2));
19718 match(Set dst ( OrReductionV src1 src2));
19719 match(Set dst (XorReductionV src1 src2));
19720 match(Set dst (MinReductionV src1 src2));
19721 match(Set dst (MaxReductionV src1 src2));
19722 match(Set dst (UMinReductionV src1 src2));
19723 match(Set dst (UMaxReductionV src1 src2));
19724 effect(TEMP vtmp1, TEMP vtmp2);
19725 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19726 ins_encode %{
19727 int opcode = this->ideal_Opcode();
19728 int vlen = Matcher::vector_length(this, $src2);
19729 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19730 %}
19731 ins_pipe( pipe_slow );
19732 %}
19733
19734 // =======================Long Reduction==========================================
19735
19736 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19737 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19738 match(Set dst (AddReductionVL src1 src2));
19739 match(Set dst (MulReductionVL src1 src2));
19740 match(Set dst (AndReductionV src1 src2));
19741 match(Set dst ( OrReductionV src1 src2));
19742 match(Set dst (XorReductionV src1 src2));
19743 match(Set dst (MinReductionV src1 src2));
19744 match(Set dst (MaxReductionV src1 src2));
19745 match(Set dst (UMinReductionV src1 src2));
19746 match(Set dst (UMaxReductionV src1 src2));
19747 effect(TEMP vtmp1, TEMP vtmp2);
19748 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19749 ins_encode %{
19750 int opcode = this->ideal_Opcode();
19751 int vlen = Matcher::vector_length(this, $src2);
19752 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19753 %}
19754 ins_pipe( pipe_slow );
19755 %}
19756
19757 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19758 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19759 match(Set dst (AddReductionVL src1 src2));
19760 match(Set dst (MulReductionVL src1 src2));
19761 match(Set dst (AndReductionV src1 src2));
19762 match(Set dst ( OrReductionV src1 src2));
19763 match(Set dst (XorReductionV src1 src2));
19764 match(Set dst (MinReductionV src1 src2));
19765 match(Set dst (MaxReductionV src1 src2));
19766 match(Set dst (UMinReductionV src1 src2));
19767 match(Set dst (UMaxReductionV src1 src2));
19768 effect(TEMP vtmp1, TEMP vtmp2);
19769 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19770 ins_encode %{
19771 int opcode = this->ideal_Opcode();
19772 int vlen = Matcher::vector_length(this, $src2);
19773 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19774 %}
19775 ins_pipe( pipe_slow );
19776 %}
19777
19778 // =======================Float Reduction==========================================
19779
19780 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19781 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19782 match(Set dst (AddReductionVF dst src));
19783 match(Set dst (MulReductionVF dst src));
19784 effect(TEMP dst, TEMP vtmp);
19785 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19786 ins_encode %{
19787 int opcode = this->ideal_Opcode();
19788 int vlen = Matcher::vector_length(this, $src);
19789 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19790 %}
19791 ins_pipe( pipe_slow );
19792 %}
19793
19794 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19795 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19796 match(Set dst (AddReductionVF dst src));
19797 match(Set dst (MulReductionVF dst src));
19798 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19799 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19800 ins_encode %{
19801 int opcode = this->ideal_Opcode();
19802 int vlen = Matcher::vector_length(this, $src);
19803 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19804 %}
19805 ins_pipe( pipe_slow );
19806 %}
19807
19808 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19809 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19810 match(Set dst (AddReductionVF dst src));
19811 match(Set dst (MulReductionVF dst src));
19812 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19813 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19814 ins_encode %{
19815 int opcode = this->ideal_Opcode();
19816 int vlen = Matcher::vector_length(this, $src);
19817 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19818 %}
19819 ins_pipe( pipe_slow );
19820 %}
19821
19822
19823 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19824 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19825 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19826 // src1 contains reduction identity
19827 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19828 match(Set dst (AddReductionVF src1 src2));
19829 match(Set dst (MulReductionVF src1 src2));
19830 effect(TEMP dst);
19831 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19832 ins_encode %{
19833 int opcode = this->ideal_Opcode();
19834 int vlen = Matcher::vector_length(this, $src2);
19835 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19836 %}
19837 ins_pipe( pipe_slow );
19838 %}
19839
19840 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19841 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19842 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19843 // src1 contains reduction identity
19844 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19845 match(Set dst (AddReductionVF src1 src2));
19846 match(Set dst (MulReductionVF src1 src2));
19847 effect(TEMP dst, TEMP vtmp);
19848 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19849 ins_encode %{
19850 int opcode = this->ideal_Opcode();
19851 int vlen = Matcher::vector_length(this, $src2);
19852 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19853 %}
19854 ins_pipe( pipe_slow );
19855 %}
19856
19857 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19858 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19859 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19860 // src1 contains reduction identity
19861 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19862 match(Set dst (AddReductionVF src1 src2));
19863 match(Set dst (MulReductionVF src1 src2));
19864 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19865 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19866 ins_encode %{
19867 int opcode = this->ideal_Opcode();
19868 int vlen = Matcher::vector_length(this, $src2);
19869 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19870 %}
19871 ins_pipe( pipe_slow );
19872 %}
19873
19874 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19875 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19876 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19877 // src1 contains reduction identity
19878 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19879 match(Set dst (AddReductionVF src1 src2));
19880 match(Set dst (MulReductionVF src1 src2));
19881 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19882 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19883 ins_encode %{
19884 int opcode = this->ideal_Opcode();
19885 int vlen = Matcher::vector_length(this, $src2);
19886 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19887 %}
19888 ins_pipe( pipe_slow );
19889 %}
19890
19891 // =======================Double Reduction==========================================
19892
19893 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19894 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19895 match(Set dst (AddReductionVD dst src));
19896 match(Set dst (MulReductionVD dst src));
19897 effect(TEMP dst, TEMP vtmp);
19898 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19899 ins_encode %{
19900 int opcode = this->ideal_Opcode();
19901 int vlen = Matcher::vector_length(this, $src);
19902 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19903 %}
19904 ins_pipe( pipe_slow );
19905 %}
19906
19907 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19908 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19909 match(Set dst (AddReductionVD dst src));
19910 match(Set dst (MulReductionVD dst src));
19911 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19912 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19913 ins_encode %{
19914 int opcode = this->ideal_Opcode();
19915 int vlen = Matcher::vector_length(this, $src);
19916 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19917 %}
19918 ins_pipe( pipe_slow );
19919 %}
19920
19921 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19922 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19923 match(Set dst (AddReductionVD dst src));
19924 match(Set dst (MulReductionVD dst src));
19925 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19926 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19927 ins_encode %{
19928 int opcode = this->ideal_Opcode();
19929 int vlen = Matcher::vector_length(this, $src);
19930 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19931 %}
19932 ins_pipe( pipe_slow );
19933 %}
19934
19935 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19936 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19937 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19938 // src1 contains reduction identity
19939 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19940 match(Set dst (AddReductionVD src1 src2));
19941 match(Set dst (MulReductionVD src1 src2));
19942 effect(TEMP dst);
19943 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19944 ins_encode %{
19945 int opcode = this->ideal_Opcode();
19946 int vlen = Matcher::vector_length(this, $src2);
19947 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19948 %}
19949 ins_pipe( pipe_slow );
19950 %}
19951
19952 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19953 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19954 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19955 // src1 contains reduction identity
19956 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19957 match(Set dst (AddReductionVD src1 src2));
19958 match(Set dst (MulReductionVD src1 src2));
19959 effect(TEMP dst, TEMP vtmp);
19960 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19961 ins_encode %{
19962 int opcode = this->ideal_Opcode();
19963 int vlen = Matcher::vector_length(this, $src2);
19964 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19965 %}
19966 ins_pipe( pipe_slow );
19967 %}
19968
19969 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19970 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19971 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19972 // src1 contains reduction identity
19973 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19974 match(Set dst (AddReductionVD src1 src2));
19975 match(Set dst (MulReductionVD src1 src2));
19976 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19977 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19978 ins_encode %{
19979 int opcode = this->ideal_Opcode();
19980 int vlen = Matcher::vector_length(this, $src2);
19981 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19982 %}
19983 ins_pipe( pipe_slow );
19984 %}
19985
19986 // =======================Byte Reduction==========================================
19987
19988 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19989 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19990 match(Set dst (AddReductionVI src1 src2));
19991 match(Set dst (AndReductionV src1 src2));
19992 match(Set dst ( OrReductionV src1 src2));
19993 match(Set dst (XorReductionV src1 src2));
19994 match(Set dst (MinReductionV src1 src2));
19995 match(Set dst (MaxReductionV src1 src2));
19996 match(Set dst (UMinReductionV src1 src2));
19997 match(Set dst (UMaxReductionV src1 src2));
19998 effect(TEMP vtmp1, TEMP vtmp2);
19999 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20000 ins_encode %{
20001 int opcode = this->ideal_Opcode();
20002 int vlen = Matcher::vector_length(this, $src2);
20003 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20004 %}
20005 ins_pipe( pipe_slow );
20006 %}
20007
20008 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20009 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
20010 match(Set dst (AddReductionVI src1 src2));
20011 match(Set dst (AndReductionV src1 src2));
20012 match(Set dst ( OrReductionV src1 src2));
20013 match(Set dst (XorReductionV src1 src2));
20014 match(Set dst (MinReductionV src1 src2));
20015 match(Set dst (MaxReductionV src1 src2));
20016 match(Set dst (UMinReductionV src1 src2));
20017 match(Set dst (UMaxReductionV src1 src2));
20018 effect(TEMP vtmp1, TEMP vtmp2);
20019 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20020 ins_encode %{
20021 int opcode = this->ideal_Opcode();
20022 int vlen = Matcher::vector_length(this, $src2);
20023 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20024 %}
20025 ins_pipe( pipe_slow );
20026 %}
20027
20028 // =======================Short Reduction==========================================
20029
20030 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20031 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20032 match(Set dst (AddReductionVI src1 src2));
20033 match(Set dst (MulReductionVI src1 src2));
20034 match(Set dst (AndReductionV src1 src2));
20035 match(Set dst ( OrReductionV src1 src2));
20036 match(Set dst (XorReductionV src1 src2));
20037 match(Set dst (MinReductionV src1 src2));
20038 match(Set dst (MaxReductionV src1 src2));
20039 match(Set dst (UMinReductionV src1 src2));
20040 match(Set dst (UMaxReductionV src1 src2));
20041 effect(TEMP vtmp1, TEMP vtmp2);
20042 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20043 ins_encode %{
20044 int opcode = this->ideal_Opcode();
20045 int vlen = Matcher::vector_length(this, $src2);
20046 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20047 %}
20048 ins_pipe( pipe_slow );
20049 %}
20050
20051 // =======================Mul Reduction==========================================
20052
20053 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20054 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20055 Matcher::vector_length(n->in(2)) <= 32); // src2
20056 match(Set dst (MulReductionVI src1 src2));
20057 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20058 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20059 ins_encode %{
20060 int opcode = this->ideal_Opcode();
20061 int vlen = Matcher::vector_length(this, $src2);
20062 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20063 %}
20064 ins_pipe( pipe_slow );
20065 %}
20066
20067 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20068 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20069 Matcher::vector_length(n->in(2)) == 64); // src2
20070 match(Set dst (MulReductionVI src1 src2));
20071 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20072 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20073 ins_encode %{
20074 int opcode = this->ideal_Opcode();
20075 int vlen = Matcher::vector_length(this, $src2);
20076 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20077 %}
20078 ins_pipe( pipe_slow );
20079 %}
20080
20081 //--------------------Min/Max Float Reduction --------------------
20082 // Float Min Reduction
20083 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20084 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20085 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20086 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20087 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20088 Matcher::vector_length(n->in(2)) == 2);
20089 match(Set dst (MinReductionV src1 src2));
20090 match(Set dst (MaxReductionV src1 src2));
20091 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20092 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20093 ins_encode %{
20094 assert(UseAVX > 0, "sanity");
20095
20096 int opcode = this->ideal_Opcode();
20097 int vlen = Matcher::vector_length(this, $src2);
20098 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20099 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20100 %}
20101 ins_pipe( pipe_slow );
20102 %}
20103
20104 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20105 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20106 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20107 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20108 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20109 Matcher::vector_length(n->in(2)) >= 4);
20110 match(Set dst (MinReductionV src1 src2));
20111 match(Set dst (MaxReductionV src1 src2));
20112 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20113 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20114 ins_encode %{
20115 assert(UseAVX > 0, "sanity");
20116
20117 int opcode = this->ideal_Opcode();
20118 int vlen = Matcher::vector_length(this, $src2);
20119 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20120 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20121 %}
20122 ins_pipe( pipe_slow );
20123 %}
20124
20125 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20126 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20127 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20128 Matcher::vector_length(n->in(2)) == 2);
20129 match(Set dst (MinReductionV dst src));
20130 match(Set dst (MaxReductionV dst src));
20131 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20132 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20133 ins_encode %{
20134 assert(UseAVX > 0, "sanity");
20135
20136 int opcode = this->ideal_Opcode();
20137 int vlen = Matcher::vector_length(this, $src);
20138 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20139 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20140 %}
20141 ins_pipe( pipe_slow );
20142 %}
20143
20144
20145 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20146 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20147 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20148 Matcher::vector_length(n->in(2)) >= 4);
20149 match(Set dst (MinReductionV dst src));
20150 match(Set dst (MaxReductionV dst src));
20151 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20152 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20153 ins_encode %{
20154 assert(UseAVX > 0, "sanity");
20155
20156 int opcode = this->ideal_Opcode();
20157 int vlen = Matcher::vector_length(this, $src);
20158 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20159 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20160 %}
20161 ins_pipe( pipe_slow );
20162 %}
20163
20164 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20165 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20166 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20167 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20168 Matcher::vector_length(n->in(2)) == 2);
20169 match(Set dst (MinReductionV src1 src2));
20170 match(Set dst (MaxReductionV src1 src2));
20171 effect(TEMP dst, TEMP xtmp1);
20172 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20173 ins_encode %{
20174 int opcode = this->ideal_Opcode();
20175 int vlen = Matcher::vector_length(this, $src2);
20176 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20177 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20178 %}
20179 ins_pipe( pipe_slow );
20180 %}
20181
20182 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20183 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20184 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20185 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20186 Matcher::vector_length(n->in(2)) >= 4);
20187 match(Set dst (MinReductionV src1 src2));
20188 match(Set dst (MaxReductionV src1 src2));
20189 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20190 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20191 ins_encode %{
20192 int opcode = this->ideal_Opcode();
20193 int vlen = Matcher::vector_length(this, $src2);
20194 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20195 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20196 %}
20197 ins_pipe( pipe_slow );
20198 %}
20199
20200 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20201 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20202 Matcher::vector_length(n->in(2)) == 2);
20203 match(Set dst (MinReductionV dst src));
20204 match(Set dst (MaxReductionV dst src));
20205 effect(TEMP dst, TEMP xtmp1);
20206 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20207 ins_encode %{
20208 int opcode = this->ideal_Opcode();
20209 int vlen = Matcher::vector_length(this, $src);
20210 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20211 $xtmp1$$XMMRegister);
20212 %}
20213 ins_pipe( pipe_slow );
20214 %}
20215
20216 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20217 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20218 Matcher::vector_length(n->in(2)) >= 4);
20219 match(Set dst (MinReductionV dst src));
20220 match(Set dst (MaxReductionV dst src));
20221 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20222 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20223 ins_encode %{
20224 int opcode = this->ideal_Opcode();
20225 int vlen = Matcher::vector_length(this, $src);
20226 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20227 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20228 %}
20229 ins_pipe( pipe_slow );
20230 %}
20231
20232 //--------------------Min Double Reduction --------------------
20233 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20234 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20235 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20236 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20237 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20238 Matcher::vector_length(n->in(2)) == 2);
20239 match(Set dst (MinReductionV src1 src2));
20240 match(Set dst (MaxReductionV src1 src2));
20241 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20242 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20243 ins_encode %{
20244 assert(UseAVX > 0, "sanity");
20245
20246 int opcode = this->ideal_Opcode();
20247 int vlen = Matcher::vector_length(this, $src2);
20248 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20249 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20250 %}
20251 ins_pipe( pipe_slow );
20252 %}
20253
20254 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20255 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20256 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20257 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20258 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20259 Matcher::vector_length(n->in(2)) >= 4);
20260 match(Set dst (MinReductionV src1 src2));
20261 match(Set dst (MaxReductionV src1 src2));
20262 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20263 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20264 ins_encode %{
20265 assert(UseAVX > 0, "sanity");
20266
20267 int opcode = this->ideal_Opcode();
20268 int vlen = Matcher::vector_length(this, $src2);
20269 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20270 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20271 %}
20272 ins_pipe( pipe_slow );
20273 %}
20274
20275
20276 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20277 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20278 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20279 Matcher::vector_length(n->in(2)) == 2);
20280 match(Set dst (MinReductionV dst src));
20281 match(Set dst (MaxReductionV dst src));
20282 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20283 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20284 ins_encode %{
20285 assert(UseAVX > 0, "sanity");
20286
20287 int opcode = this->ideal_Opcode();
20288 int vlen = Matcher::vector_length(this, $src);
20289 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20290 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20291 %}
20292 ins_pipe( pipe_slow );
20293 %}
20294
20295 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20296 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20297 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20298 Matcher::vector_length(n->in(2)) >= 4);
20299 match(Set dst (MinReductionV dst src));
20300 match(Set dst (MaxReductionV dst src));
20301 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20302 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20303 ins_encode %{
20304 assert(UseAVX > 0, "sanity");
20305
20306 int opcode = this->ideal_Opcode();
20307 int vlen = Matcher::vector_length(this, $src);
20308 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20309 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20310 %}
20311 ins_pipe( pipe_slow );
20312 %}
20313
20314 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20315 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20316 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20317 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20318 Matcher::vector_length(n->in(2)) == 2);
20319 match(Set dst (MinReductionV src1 src2));
20320 match(Set dst (MaxReductionV src1 src2));
20321 effect(TEMP dst, TEMP xtmp1);
20322 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20323 ins_encode %{
20324 int opcode = this->ideal_Opcode();
20325 int vlen = Matcher::vector_length(this, $src2);
20326 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20327 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20328 %}
20329 ins_pipe( pipe_slow );
20330 %}
20331
20332 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20333 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20334 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20335 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20336 Matcher::vector_length(n->in(2)) >= 4);
20337 match(Set dst (MinReductionV src1 src2));
20338 match(Set dst (MaxReductionV src1 src2));
20339 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20340 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20341 ins_encode %{
20342 int opcode = this->ideal_Opcode();
20343 int vlen = Matcher::vector_length(this, $src2);
20344 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20345 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20346 %}
20347 ins_pipe( pipe_slow );
20348 %}
20349
20350
20351 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20352 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20353 Matcher::vector_length(n->in(2)) == 2);
20354 match(Set dst (MinReductionV dst src));
20355 match(Set dst (MaxReductionV dst src));
20356 effect(TEMP dst, TEMP xtmp1);
20357 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20358 ins_encode %{
20359 int opcode = this->ideal_Opcode();
20360 int vlen = Matcher::vector_length(this, $src);
20361 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20362 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20363 %}
20364 ins_pipe( pipe_slow );
20365 %}
20366
20367 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20368 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20369 Matcher::vector_length(n->in(2)) >= 4);
20370 match(Set dst (MinReductionV dst src));
20371 match(Set dst (MaxReductionV dst src));
20372 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20373 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20374 ins_encode %{
20375 int opcode = this->ideal_Opcode();
20376 int vlen = Matcher::vector_length(this, $src);
20377 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20378 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20379 %}
20380 ins_pipe( pipe_slow );
20381 %}
20382
20383 // ====================VECTOR ARITHMETIC=======================================
20384
20385 // --------------------------------- ADD --------------------------------------
20386
20387 // Bytes vector add
20388 instruct vaddB(vec dst, vec src) %{
20389 predicate(UseAVX == 0);
20390 match(Set dst (AddVB dst src));
20391 format %{ "paddb $dst,$src\t! add packedB" %}
20392 ins_encode %{
20393 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20394 %}
20395 ins_pipe( pipe_slow );
20396 %}
20397
20398 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20399 predicate(UseAVX > 0);
20400 match(Set dst (AddVB src1 src2));
20401 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20402 ins_encode %{
20403 int vlen_enc = vector_length_encoding(this);
20404 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20405 %}
20406 ins_pipe( pipe_slow );
20407 %}
20408
20409 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20410 predicate((UseAVX > 0) &&
20411 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20412 match(Set dst (AddVB src (LoadVector mem)));
20413 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20414 ins_encode %{
20415 int vlen_enc = vector_length_encoding(this);
20416 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20417 %}
20418 ins_pipe( pipe_slow );
20419 %}
20420
20421 // Shorts/Chars vector add
20422 instruct vaddS(vec dst, vec src) %{
20423 predicate(UseAVX == 0);
20424 match(Set dst (AddVS dst src));
20425 format %{ "paddw $dst,$src\t! add packedS" %}
20426 ins_encode %{
20427 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20428 %}
20429 ins_pipe( pipe_slow );
20430 %}
20431
20432 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20433 predicate(UseAVX > 0);
20434 match(Set dst (AddVS src1 src2));
20435 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20436 ins_encode %{
20437 int vlen_enc = vector_length_encoding(this);
20438 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20439 %}
20440 ins_pipe( pipe_slow );
20441 %}
20442
20443 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20444 predicate((UseAVX > 0) &&
20445 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20446 match(Set dst (AddVS src (LoadVector mem)));
20447 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20448 ins_encode %{
20449 int vlen_enc = vector_length_encoding(this);
20450 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20451 %}
20452 ins_pipe( pipe_slow );
20453 %}
20454
20455 // Integers vector add
20456 instruct vaddI(vec dst, vec src) %{
20457 predicate(UseAVX == 0);
20458 match(Set dst (AddVI dst src));
20459 format %{ "paddd $dst,$src\t! add packedI" %}
20460 ins_encode %{
20461 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20462 %}
20463 ins_pipe( pipe_slow );
20464 %}
20465
20466 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20467 predicate(UseAVX > 0);
20468 match(Set dst (AddVI src1 src2));
20469 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20470 ins_encode %{
20471 int vlen_enc = vector_length_encoding(this);
20472 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20473 %}
20474 ins_pipe( pipe_slow );
20475 %}
20476
20477
20478 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20479 predicate((UseAVX > 0) &&
20480 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20481 match(Set dst (AddVI src (LoadVector mem)));
20482 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20483 ins_encode %{
20484 int vlen_enc = vector_length_encoding(this);
20485 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20486 %}
20487 ins_pipe( pipe_slow );
20488 %}
20489
20490 // Longs vector add
20491 instruct vaddL(vec dst, vec src) %{
20492 predicate(UseAVX == 0);
20493 match(Set dst (AddVL dst src));
20494 format %{ "paddq $dst,$src\t! add packedL" %}
20495 ins_encode %{
20496 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20497 %}
20498 ins_pipe( pipe_slow );
20499 %}
20500
20501 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20502 predicate(UseAVX > 0);
20503 match(Set dst (AddVL src1 src2));
20504 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20505 ins_encode %{
20506 int vlen_enc = vector_length_encoding(this);
20507 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20508 %}
20509 ins_pipe( pipe_slow );
20510 %}
20511
20512 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20513 predicate((UseAVX > 0) &&
20514 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20515 match(Set dst (AddVL src (LoadVector mem)));
20516 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20517 ins_encode %{
20518 int vlen_enc = vector_length_encoding(this);
20519 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20520 %}
20521 ins_pipe( pipe_slow );
20522 %}
20523
20524 // Floats vector add
20525 instruct vaddF(vec dst, vec src) %{
20526 predicate(UseAVX == 0);
20527 match(Set dst (AddVF dst src));
20528 format %{ "addps $dst,$src\t! add packedF" %}
20529 ins_encode %{
20530 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20531 %}
20532 ins_pipe( pipe_slow );
20533 %}
20534
20535 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20536 predicate(UseAVX > 0);
20537 match(Set dst (AddVF src1 src2));
20538 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20539 ins_encode %{
20540 int vlen_enc = vector_length_encoding(this);
20541 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20542 %}
20543 ins_pipe( pipe_slow );
20544 %}
20545
20546 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20547 predicate((UseAVX > 0) &&
20548 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20549 match(Set dst (AddVF src (LoadVector mem)));
20550 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20551 ins_encode %{
20552 int vlen_enc = vector_length_encoding(this);
20553 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20554 %}
20555 ins_pipe( pipe_slow );
20556 %}
20557
20558 // Doubles vector add
20559 instruct vaddD(vec dst, vec src) %{
20560 predicate(UseAVX == 0);
20561 match(Set dst (AddVD dst src));
20562 format %{ "addpd $dst,$src\t! add packedD" %}
20563 ins_encode %{
20564 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20565 %}
20566 ins_pipe( pipe_slow );
20567 %}
20568
20569 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20570 predicate(UseAVX > 0);
20571 match(Set dst (AddVD src1 src2));
20572 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20573 ins_encode %{
20574 int vlen_enc = vector_length_encoding(this);
20575 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20576 %}
20577 ins_pipe( pipe_slow );
20578 %}
20579
20580 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20581 predicate((UseAVX > 0) &&
20582 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20583 match(Set dst (AddVD src (LoadVector mem)));
20584 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20585 ins_encode %{
20586 int vlen_enc = vector_length_encoding(this);
20587 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20588 %}
20589 ins_pipe( pipe_slow );
20590 %}
20591
20592 // --------------------------------- SUB --------------------------------------
20593
20594 // Bytes vector sub
20595 instruct vsubB(vec dst, vec src) %{
20596 predicate(UseAVX == 0);
20597 match(Set dst (SubVB dst src));
20598 format %{ "psubb $dst,$src\t! sub packedB" %}
20599 ins_encode %{
20600 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20601 %}
20602 ins_pipe( pipe_slow );
20603 %}
20604
20605 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20606 predicate(UseAVX > 0);
20607 match(Set dst (SubVB src1 src2));
20608 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20609 ins_encode %{
20610 int vlen_enc = vector_length_encoding(this);
20611 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20612 %}
20613 ins_pipe( pipe_slow );
20614 %}
20615
20616 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20617 predicate((UseAVX > 0) &&
20618 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20619 match(Set dst (SubVB src (LoadVector mem)));
20620 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20621 ins_encode %{
20622 int vlen_enc = vector_length_encoding(this);
20623 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20624 %}
20625 ins_pipe( pipe_slow );
20626 %}
20627
20628 // Shorts/Chars vector sub
20629 instruct vsubS(vec dst, vec src) %{
20630 predicate(UseAVX == 0);
20631 match(Set dst (SubVS dst src));
20632 format %{ "psubw $dst,$src\t! sub packedS" %}
20633 ins_encode %{
20634 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20635 %}
20636 ins_pipe( pipe_slow );
20637 %}
20638
20639
20640 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20641 predicate(UseAVX > 0);
20642 match(Set dst (SubVS src1 src2));
20643 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20644 ins_encode %{
20645 int vlen_enc = vector_length_encoding(this);
20646 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20647 %}
20648 ins_pipe( pipe_slow );
20649 %}
20650
20651 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20652 predicate((UseAVX > 0) &&
20653 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20654 match(Set dst (SubVS src (LoadVector mem)));
20655 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20656 ins_encode %{
20657 int vlen_enc = vector_length_encoding(this);
20658 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20659 %}
20660 ins_pipe( pipe_slow );
20661 %}
20662
20663 // Integers vector sub
20664 instruct vsubI(vec dst, vec src) %{
20665 predicate(UseAVX == 0);
20666 match(Set dst (SubVI dst src));
20667 format %{ "psubd $dst,$src\t! sub packedI" %}
20668 ins_encode %{
20669 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20670 %}
20671 ins_pipe( pipe_slow );
20672 %}
20673
20674 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20675 predicate(UseAVX > 0);
20676 match(Set dst (SubVI src1 src2));
20677 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20678 ins_encode %{
20679 int vlen_enc = vector_length_encoding(this);
20680 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20681 %}
20682 ins_pipe( pipe_slow );
20683 %}
20684
20685 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20686 predicate((UseAVX > 0) &&
20687 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20688 match(Set dst (SubVI src (LoadVector mem)));
20689 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20690 ins_encode %{
20691 int vlen_enc = vector_length_encoding(this);
20692 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20693 %}
20694 ins_pipe( pipe_slow );
20695 %}
20696
20697 // Longs vector sub
20698 instruct vsubL(vec dst, vec src) %{
20699 predicate(UseAVX == 0);
20700 match(Set dst (SubVL dst src));
20701 format %{ "psubq $dst,$src\t! sub packedL" %}
20702 ins_encode %{
20703 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20704 %}
20705 ins_pipe( pipe_slow );
20706 %}
20707
20708 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20709 predicate(UseAVX > 0);
20710 match(Set dst (SubVL src1 src2));
20711 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20712 ins_encode %{
20713 int vlen_enc = vector_length_encoding(this);
20714 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20715 %}
20716 ins_pipe( pipe_slow );
20717 %}
20718
20719
20720 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20721 predicate((UseAVX > 0) &&
20722 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20723 match(Set dst (SubVL src (LoadVector mem)));
20724 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20725 ins_encode %{
20726 int vlen_enc = vector_length_encoding(this);
20727 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20728 %}
20729 ins_pipe( pipe_slow );
20730 %}
20731
20732 // Floats vector sub
20733 instruct vsubF(vec dst, vec src) %{
20734 predicate(UseAVX == 0);
20735 match(Set dst (SubVF dst src));
20736 format %{ "subps $dst,$src\t! sub packedF" %}
20737 ins_encode %{
20738 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20739 %}
20740 ins_pipe( pipe_slow );
20741 %}
20742
20743 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20744 predicate(UseAVX > 0);
20745 match(Set dst (SubVF src1 src2));
20746 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20747 ins_encode %{
20748 int vlen_enc = vector_length_encoding(this);
20749 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20750 %}
20751 ins_pipe( pipe_slow );
20752 %}
20753
20754 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20755 predicate((UseAVX > 0) &&
20756 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20757 match(Set dst (SubVF src (LoadVector mem)));
20758 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20759 ins_encode %{
20760 int vlen_enc = vector_length_encoding(this);
20761 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20762 %}
20763 ins_pipe( pipe_slow );
20764 %}
20765
20766 // Doubles vector sub
20767 instruct vsubD(vec dst, vec src) %{
20768 predicate(UseAVX == 0);
20769 match(Set dst (SubVD dst src));
20770 format %{ "subpd $dst,$src\t! sub packedD" %}
20771 ins_encode %{
20772 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20773 %}
20774 ins_pipe( pipe_slow );
20775 %}
20776
20777 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20778 predicate(UseAVX > 0);
20779 match(Set dst (SubVD src1 src2));
20780 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20781 ins_encode %{
20782 int vlen_enc = vector_length_encoding(this);
20783 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20784 %}
20785 ins_pipe( pipe_slow );
20786 %}
20787
20788 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20789 predicate((UseAVX > 0) &&
20790 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20791 match(Set dst (SubVD src (LoadVector mem)));
20792 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20793 ins_encode %{
20794 int vlen_enc = vector_length_encoding(this);
20795 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20796 %}
20797 ins_pipe( pipe_slow );
20798 %}
20799
20800 // --------------------------------- MUL --------------------------------------
20801
20802 // Byte vector mul
20803 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20804 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20805 match(Set dst (MulVB src1 src2));
20806 effect(TEMP dst, TEMP xtmp);
20807 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20808 ins_encode %{
20809 assert(UseSSE > 3, "required");
20810 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20811 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20812 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20813 __ psllw($dst$$XMMRegister, 8);
20814 __ psrlw($dst$$XMMRegister, 8);
20815 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20816 %}
20817 ins_pipe( pipe_slow );
20818 %}
20819
20820 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20821 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20822 match(Set dst (MulVB src1 src2));
20823 effect(TEMP dst, TEMP xtmp);
20824 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20825 ins_encode %{
20826 assert(UseSSE > 3, "required");
20827 // Odd-index elements
20828 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20829 __ psrlw($dst$$XMMRegister, 8);
20830 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20831 __ psrlw($xtmp$$XMMRegister, 8);
20832 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20833 __ psllw($dst$$XMMRegister, 8);
20834 // Even-index elements
20835 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20836 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20837 __ psllw($xtmp$$XMMRegister, 8);
20838 __ psrlw($xtmp$$XMMRegister, 8);
20839 // Combine
20840 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20841 %}
20842 ins_pipe( pipe_slow );
20843 %}
20844
20845 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20846 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20847 match(Set dst (MulVB src1 src2));
20848 effect(TEMP xtmp1, TEMP xtmp2);
20849 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20850 ins_encode %{
20851 int vlen_enc = vector_length_encoding(this);
20852 // Odd-index elements
20853 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20854 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20855 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20856 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20857 // Even-index elements
20858 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20859 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20860 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20861 // Combine
20862 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20863 %}
20864 ins_pipe( pipe_slow );
20865 %}
20866
20867 // Shorts/Chars vector mul
20868 instruct vmulS(vec dst, vec src) %{
20869 predicate(UseAVX == 0);
20870 match(Set dst (MulVS dst src));
20871 format %{ "pmullw $dst,$src\t! mul packedS" %}
20872 ins_encode %{
20873 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20874 %}
20875 ins_pipe( pipe_slow );
20876 %}
20877
20878 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20879 predicate(UseAVX > 0);
20880 match(Set dst (MulVS src1 src2));
20881 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20882 ins_encode %{
20883 int vlen_enc = vector_length_encoding(this);
20884 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20885 %}
20886 ins_pipe( pipe_slow );
20887 %}
20888
20889 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20890 predicate((UseAVX > 0) &&
20891 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20892 match(Set dst (MulVS src (LoadVector mem)));
20893 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20894 ins_encode %{
20895 int vlen_enc = vector_length_encoding(this);
20896 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20897 %}
20898 ins_pipe( pipe_slow );
20899 %}
20900
20901 // Integers vector mul
20902 instruct vmulI(vec dst, vec src) %{
20903 predicate(UseAVX == 0);
20904 match(Set dst (MulVI dst src));
20905 format %{ "pmulld $dst,$src\t! mul packedI" %}
20906 ins_encode %{
20907 assert(UseSSE > 3, "required");
20908 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20909 %}
20910 ins_pipe( pipe_slow );
20911 %}
20912
20913 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20914 predicate(UseAVX > 0);
20915 match(Set dst (MulVI src1 src2));
20916 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20917 ins_encode %{
20918 int vlen_enc = vector_length_encoding(this);
20919 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20920 %}
20921 ins_pipe( pipe_slow );
20922 %}
20923
20924 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20925 predicate((UseAVX > 0) &&
20926 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20927 match(Set dst (MulVI src (LoadVector mem)));
20928 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20929 ins_encode %{
20930 int vlen_enc = vector_length_encoding(this);
20931 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20932 %}
20933 ins_pipe( pipe_slow );
20934 %}
20935
20936 // Longs vector mul
20937 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20938 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20939 VM_Version::supports_avx512dq()) ||
20940 VM_Version::supports_avx512vldq());
20941 match(Set dst (MulVL src1 src2));
20942 ins_cost(500);
20943 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20944 ins_encode %{
20945 assert(UseAVX > 2, "required");
20946 int vlen_enc = vector_length_encoding(this);
20947 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20948 %}
20949 ins_pipe( pipe_slow );
20950 %}
20951
20952 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20953 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20954 VM_Version::supports_avx512dq()) ||
20955 (Matcher::vector_length_in_bytes(n) > 8 &&
20956 VM_Version::supports_avx512vldq()));
20957 match(Set dst (MulVL src (LoadVector mem)));
20958 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20959 ins_cost(500);
20960 ins_encode %{
20961 assert(UseAVX > 2, "required");
20962 int vlen_enc = vector_length_encoding(this);
20963 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20964 %}
20965 ins_pipe( pipe_slow );
20966 %}
20967
20968 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20969 predicate(UseAVX == 0);
20970 match(Set dst (MulVL src1 src2));
20971 ins_cost(500);
20972 effect(TEMP dst, TEMP xtmp);
20973 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20974 ins_encode %{
20975 assert(VM_Version::supports_sse4_1(), "required");
20976 // Get the lo-hi products, only the lower 32 bits is in concerns
20977 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20978 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20979 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20980 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20981 __ psllq($dst$$XMMRegister, 32);
20982 // Get the lo-lo products
20983 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20984 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20985 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20986 %}
20987 ins_pipe( pipe_slow );
20988 %}
20989
20990 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20991 predicate(UseAVX > 0 &&
20992 ((Matcher::vector_length_in_bytes(n) == 64 &&
20993 !VM_Version::supports_avx512dq()) ||
20994 (Matcher::vector_length_in_bytes(n) < 64 &&
20995 !VM_Version::supports_avx512vldq())));
20996 match(Set dst (MulVL src1 src2));
20997 effect(TEMP xtmp1, TEMP xtmp2);
20998 ins_cost(500);
20999 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
21000 ins_encode %{
21001 int vlen_enc = vector_length_encoding(this);
21002 // Get the lo-hi products, only the lower 32 bits is in concerns
21003 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
21004 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21005 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
21006 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21007 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
21008 // Get the lo-lo products
21009 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21010 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21011 %}
21012 ins_pipe( pipe_slow );
21013 %}
21014
21015 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
21016 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
21017 match(Set dst (MulVL src1 src2));
21018 ins_cost(100);
21019 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
21020 ins_encode %{
21021 int vlen_enc = vector_length_encoding(this);
21022 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21023 %}
21024 ins_pipe( pipe_slow );
21025 %}
21026
21027 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
21028 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
21029 match(Set dst (MulVL src1 src2));
21030 ins_cost(100);
21031 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21032 ins_encode %{
21033 int vlen_enc = vector_length_encoding(this);
21034 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21035 %}
21036 ins_pipe( pipe_slow );
21037 %}
21038
21039 // Floats vector mul
21040 instruct vmulF(vec dst, vec src) %{
21041 predicate(UseAVX == 0);
21042 match(Set dst (MulVF dst src));
21043 format %{ "mulps $dst,$src\t! mul packedF" %}
21044 ins_encode %{
21045 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21046 %}
21047 ins_pipe( pipe_slow );
21048 %}
21049
21050 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21051 predicate(UseAVX > 0);
21052 match(Set dst (MulVF src1 src2));
21053 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
21054 ins_encode %{
21055 int vlen_enc = vector_length_encoding(this);
21056 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21057 %}
21058 ins_pipe( pipe_slow );
21059 %}
21060
21061 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21062 predicate((UseAVX > 0) &&
21063 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21064 match(Set dst (MulVF src (LoadVector mem)));
21065 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
21066 ins_encode %{
21067 int vlen_enc = vector_length_encoding(this);
21068 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21069 %}
21070 ins_pipe( pipe_slow );
21071 %}
21072
21073 // Doubles vector mul
21074 instruct vmulD(vec dst, vec src) %{
21075 predicate(UseAVX == 0);
21076 match(Set dst (MulVD dst src));
21077 format %{ "mulpd $dst,$src\t! mul packedD" %}
21078 ins_encode %{
21079 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21080 %}
21081 ins_pipe( pipe_slow );
21082 %}
21083
21084 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21085 predicate(UseAVX > 0);
21086 match(Set dst (MulVD src1 src2));
21087 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
21088 ins_encode %{
21089 int vlen_enc = vector_length_encoding(this);
21090 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21091 %}
21092 ins_pipe( pipe_slow );
21093 %}
21094
21095 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21096 predicate((UseAVX > 0) &&
21097 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21098 match(Set dst (MulVD src (LoadVector mem)));
21099 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
21100 ins_encode %{
21101 int vlen_enc = vector_length_encoding(this);
21102 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21103 %}
21104 ins_pipe( pipe_slow );
21105 %}
21106
21107 // --------------------------------- DIV --------------------------------------
21108
21109 // Floats vector div
21110 instruct vdivF(vec dst, vec src) %{
21111 predicate(UseAVX == 0);
21112 match(Set dst (DivVF dst src));
21113 format %{ "divps $dst,$src\t! div packedF" %}
21114 ins_encode %{
21115 __ divps($dst$$XMMRegister, $src$$XMMRegister);
21116 %}
21117 ins_pipe( pipe_slow );
21118 %}
21119
21120 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21121 predicate(UseAVX > 0);
21122 match(Set dst (DivVF src1 src2));
21123 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
21124 ins_encode %{
21125 int vlen_enc = vector_length_encoding(this);
21126 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21127 %}
21128 ins_pipe( pipe_slow );
21129 %}
21130
21131 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21132 predicate((UseAVX > 0) &&
21133 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21134 match(Set dst (DivVF src (LoadVector mem)));
21135 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
21136 ins_encode %{
21137 int vlen_enc = vector_length_encoding(this);
21138 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21139 %}
21140 ins_pipe( pipe_slow );
21141 %}
21142
21143 // Doubles vector div
21144 instruct vdivD(vec dst, vec src) %{
21145 predicate(UseAVX == 0);
21146 match(Set dst (DivVD dst src));
21147 format %{ "divpd $dst,$src\t! div packedD" %}
21148 ins_encode %{
21149 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21150 %}
21151 ins_pipe( pipe_slow );
21152 %}
21153
21154 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21155 predicate(UseAVX > 0);
21156 match(Set dst (DivVD src1 src2));
21157 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
21158 ins_encode %{
21159 int vlen_enc = vector_length_encoding(this);
21160 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21161 %}
21162 ins_pipe( pipe_slow );
21163 %}
21164
21165 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21166 predicate((UseAVX > 0) &&
21167 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21168 match(Set dst (DivVD src (LoadVector mem)));
21169 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
21170 ins_encode %{
21171 int vlen_enc = vector_length_encoding(this);
21172 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21173 %}
21174 ins_pipe( pipe_slow );
21175 %}
21176
21177 // ------------------------------ MinMax ---------------------------------------
21178
21179 // Byte, Short, Int vector Min/Max
21180 instruct minmax_reg_sse(vec dst, vec src) %{
21181 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21182 UseAVX == 0);
21183 match(Set dst (MinV dst src));
21184 match(Set dst (MaxV dst src));
21185 format %{ "vector_minmax $dst,$src\t! " %}
21186 ins_encode %{
21187 assert(UseSSE >= 4, "required");
21188
21189 int opcode = this->ideal_Opcode();
21190 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21191 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21192 %}
21193 ins_pipe( pipe_slow );
21194 %}
21195
21196 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21197 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21198 UseAVX > 0);
21199 match(Set dst (MinV src1 src2));
21200 match(Set dst (MaxV src1 src2));
21201 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
21202 ins_encode %{
21203 int opcode = this->ideal_Opcode();
21204 int vlen_enc = vector_length_encoding(this);
21205 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21206
21207 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21208 %}
21209 ins_pipe( pipe_slow );
21210 %}
21211
21212 // Long vector Min/Max
21213 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21214 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21215 UseAVX == 0);
21216 match(Set dst (MinV dst src));
21217 match(Set dst (MaxV src dst));
21218 effect(TEMP dst, TEMP tmp);
21219 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
21220 ins_encode %{
21221 assert(UseSSE >= 4, "required");
21222
21223 int opcode = this->ideal_Opcode();
21224 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21225 assert(elem_bt == T_LONG, "sanity");
21226
21227 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21228 %}
21229 ins_pipe( pipe_slow );
21230 %}
21231
21232 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21233 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21234 UseAVX > 0 && !VM_Version::supports_avx512vl());
21235 match(Set dst (MinV src1 src2));
21236 match(Set dst (MaxV src1 src2));
21237 effect(TEMP dst);
21238 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
21239 ins_encode %{
21240 int vlen_enc = vector_length_encoding(this);
21241 int opcode = this->ideal_Opcode();
21242 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21243 assert(elem_bt == T_LONG, "sanity");
21244
21245 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21246 %}
21247 ins_pipe( pipe_slow );
21248 %}
21249
21250 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21251 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21252 Matcher::vector_element_basic_type(n) == T_LONG);
21253 match(Set dst (MinV src1 src2));
21254 match(Set dst (MaxV src1 src2));
21255 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21256 ins_encode %{
21257 assert(UseAVX > 2, "required");
21258
21259 int vlen_enc = vector_length_encoding(this);
21260 int opcode = this->ideal_Opcode();
21261 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21262 assert(elem_bt == T_LONG, "sanity");
21263
21264 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21265 %}
21266 ins_pipe( pipe_slow );
21267 %}
21268
21269 // Float/Double vector Min/Max
21270 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21271 predicate(VM_Version::supports_avx10_2() &&
21272 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21273 match(Set dst (MinV a b));
21274 match(Set dst (MaxV a b));
21275 format %{ "vector_minmaxFP $dst, $a, $b" %}
21276 ins_encode %{
21277 int vlen_enc = vector_length_encoding(this);
21278 int opcode = this->ideal_Opcode();
21279 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21280 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21281 %}
21282 ins_pipe( pipe_slow );
21283 %}
21284
21285 // Float/Double vector Min/Max
21286 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21287 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21288 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21289 UseAVX > 0);
21290 match(Set dst (MinV a b));
21291 match(Set dst (MaxV a b));
21292 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21293 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21294 ins_encode %{
21295 assert(UseAVX > 0, "required");
21296
21297 int opcode = this->ideal_Opcode();
21298 int vlen_enc = vector_length_encoding(this);
21299 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21300
21301 __ vminmax_fp(opcode, elem_bt,
21302 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21303 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21304 %}
21305 ins_pipe( pipe_slow );
21306 %}
21307
21308 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21309 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21310 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21311 match(Set dst (MinV a b));
21312 match(Set dst (MaxV a b));
21313 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21314 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21315 ins_encode %{
21316 assert(UseAVX > 2, "required");
21317
21318 int opcode = this->ideal_Opcode();
21319 int vlen_enc = vector_length_encoding(this);
21320 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21321
21322 __ evminmax_fp(opcode, elem_bt,
21323 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21324 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21325 %}
21326 ins_pipe( pipe_slow );
21327 %}
21328
21329 // ------------------------------ Unsigned vector Min/Max ----------------------
21330
21331 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21332 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21333 match(Set dst (UMinV a b));
21334 match(Set dst (UMaxV a b));
21335 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21336 ins_encode %{
21337 int opcode = this->ideal_Opcode();
21338 int vlen_enc = vector_length_encoding(this);
21339 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21340 assert(is_integral_type(elem_bt), "");
21341 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21342 %}
21343 ins_pipe( pipe_slow );
21344 %}
21345
21346 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21347 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21348 match(Set dst (UMinV a (LoadVector b)));
21349 match(Set dst (UMaxV a (LoadVector b)));
21350 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21351 ins_encode %{
21352 int opcode = this->ideal_Opcode();
21353 int vlen_enc = vector_length_encoding(this);
21354 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21355 assert(is_integral_type(elem_bt), "");
21356 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21357 %}
21358 ins_pipe( pipe_slow );
21359 %}
21360
21361 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21362 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21363 match(Set dst (UMinV a b));
21364 match(Set dst (UMaxV a b));
21365 effect(TEMP xtmp1, TEMP xtmp2);
21366 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21367 ins_encode %{
21368 int opcode = this->ideal_Opcode();
21369 int vlen_enc = vector_length_encoding(this);
21370 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21371 %}
21372 ins_pipe( pipe_slow );
21373 %}
21374
21375 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21376 match(Set dst (UMinV (Binary dst src2) mask));
21377 match(Set dst (UMaxV (Binary dst src2) mask));
21378 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21379 ins_encode %{
21380 int vlen_enc = vector_length_encoding(this);
21381 BasicType bt = Matcher::vector_element_basic_type(this);
21382 int opc = this->ideal_Opcode();
21383 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21384 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21385 %}
21386 ins_pipe( pipe_slow );
21387 %}
21388
21389 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21390 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21391 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21392 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21393 ins_encode %{
21394 int vlen_enc = vector_length_encoding(this);
21395 BasicType bt = Matcher::vector_element_basic_type(this);
21396 int opc = this->ideal_Opcode();
21397 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21398 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21399 %}
21400 ins_pipe( pipe_slow );
21401 %}
21402
21403 // --------------------------------- Signum/CopySign ---------------------------
21404
21405 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21406 match(Set dst (SignumF dst (Binary zero one)));
21407 effect(KILL cr);
21408 format %{ "signumF $dst, $dst" %}
21409 ins_encode %{
21410 int opcode = this->ideal_Opcode();
21411 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21412 %}
21413 ins_pipe( pipe_slow );
21414 %}
21415
21416 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21417 match(Set dst (SignumD dst (Binary zero one)));
21418 effect(KILL cr);
21419 format %{ "signumD $dst, $dst" %}
21420 ins_encode %{
21421 int opcode = this->ideal_Opcode();
21422 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21423 %}
21424 ins_pipe( pipe_slow );
21425 %}
21426
21427 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21428 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21429 match(Set dst (SignumVF src (Binary zero one)));
21430 match(Set dst (SignumVD src (Binary zero one)));
21431 effect(TEMP dst, TEMP xtmp1);
21432 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21433 ins_encode %{
21434 int opcode = this->ideal_Opcode();
21435 int vec_enc = vector_length_encoding(this);
21436 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21437 $xtmp1$$XMMRegister, vec_enc);
21438 %}
21439 ins_pipe( pipe_slow );
21440 %}
21441
21442 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21443 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21444 match(Set dst (SignumVF src (Binary zero one)));
21445 match(Set dst (SignumVD src (Binary zero one)));
21446 effect(TEMP dst, TEMP ktmp1);
21447 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21448 ins_encode %{
21449 int opcode = this->ideal_Opcode();
21450 int vec_enc = vector_length_encoding(this);
21451 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21452 $ktmp1$$KRegister, vec_enc);
21453 %}
21454 ins_pipe( pipe_slow );
21455 %}
21456
21457 // ---------------------------------------
21458 // For copySign use 0xE4 as writemask for vpternlog
21459 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21460 // C (xmm2) is set to 0x7FFFFFFF
21461 // Wherever xmm2 is 0, we want to pick from B (sign)
21462 // Wherever xmm2 is 1, we want to pick from A (src)
21463 //
21464 // A B C Result
21465 // 0 0 0 0
21466 // 0 0 1 0
21467 // 0 1 0 1
21468 // 0 1 1 0
21469 // 1 0 0 0
21470 // 1 0 1 1
21471 // 1 1 0 1
21472 // 1 1 1 1
21473 //
21474 // Result going from high bit to low bit is 0x11100100 = 0xe4
21475 // ---------------------------------------
21476
21477 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21478 match(Set dst (CopySignF dst src));
21479 effect(TEMP tmp1, TEMP tmp2);
21480 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21481 ins_encode %{
21482 __ movl($tmp2$$Register, 0x7FFFFFFF);
21483 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21484 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21485 %}
21486 ins_pipe( pipe_slow );
21487 %}
21488
21489 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21490 match(Set dst (CopySignD dst (Binary src zero)));
21491 ins_cost(100);
21492 effect(TEMP tmp1, TEMP tmp2);
21493 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21494 ins_encode %{
21495 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21496 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21497 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21498 %}
21499 ins_pipe( pipe_slow );
21500 %}
21501
21502 //----------------------------- CompressBits/ExpandBits ------------------------
21503
21504 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21505 predicate(n->bottom_type()->isa_int());
21506 match(Set dst (CompressBits src mask));
21507 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21508 ins_encode %{
21509 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21510 %}
21511 ins_pipe( pipe_slow );
21512 %}
21513
21514 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21515 predicate(n->bottom_type()->isa_int());
21516 match(Set dst (ExpandBits src mask));
21517 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21518 ins_encode %{
21519 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21520 %}
21521 ins_pipe( pipe_slow );
21522 %}
21523
21524 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21525 predicate(n->bottom_type()->isa_int());
21526 match(Set dst (CompressBits src (LoadI mask)));
21527 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21528 ins_encode %{
21529 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21530 %}
21531 ins_pipe( pipe_slow );
21532 %}
21533
21534 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21535 predicate(n->bottom_type()->isa_int());
21536 match(Set dst (ExpandBits src (LoadI mask)));
21537 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21538 ins_encode %{
21539 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21540 %}
21541 ins_pipe( pipe_slow );
21542 %}
21543
21544 // --------------------------------- Sqrt --------------------------------------
21545
21546 instruct vsqrtF_reg(vec dst, vec src) %{
21547 match(Set dst (SqrtVF src));
21548 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21549 ins_encode %{
21550 assert(UseAVX > 0, "required");
21551 int vlen_enc = vector_length_encoding(this);
21552 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21553 %}
21554 ins_pipe( pipe_slow );
21555 %}
21556
21557 instruct vsqrtF_mem(vec dst, memory mem) %{
21558 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21559 match(Set dst (SqrtVF (LoadVector mem)));
21560 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21561 ins_encode %{
21562 assert(UseAVX > 0, "required");
21563 int vlen_enc = vector_length_encoding(this);
21564 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21565 %}
21566 ins_pipe( pipe_slow );
21567 %}
21568
21569 // Floating point vector sqrt
21570 instruct vsqrtD_reg(vec dst, vec src) %{
21571 match(Set dst (SqrtVD src));
21572 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21573 ins_encode %{
21574 assert(UseAVX > 0, "required");
21575 int vlen_enc = vector_length_encoding(this);
21576 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21577 %}
21578 ins_pipe( pipe_slow );
21579 %}
21580
21581 instruct vsqrtD_mem(vec dst, memory mem) %{
21582 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21583 match(Set dst (SqrtVD (LoadVector mem)));
21584 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21585 ins_encode %{
21586 assert(UseAVX > 0, "required");
21587 int vlen_enc = vector_length_encoding(this);
21588 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21589 %}
21590 ins_pipe( pipe_slow );
21591 %}
21592
21593 // ------------------------------ Shift ---------------------------------------
21594
21595 // Left and right shift count vectors are the same on x86
21596 // (only lowest bits of xmm reg are used for count).
21597 instruct vshiftcnt(vec dst, rRegI cnt) %{
21598 match(Set dst (LShiftCntV cnt));
21599 match(Set dst (RShiftCntV cnt));
21600 format %{ "movdl $dst,$cnt\t! load shift count" %}
21601 ins_encode %{
21602 __ movdl($dst$$XMMRegister, $cnt$$Register);
21603 %}
21604 ins_pipe( pipe_slow );
21605 %}
21606
21607 // Byte vector shift
21608 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21609 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21610 match(Set dst ( LShiftVB src shift));
21611 match(Set dst ( RShiftVB src shift));
21612 match(Set dst (URShiftVB src shift));
21613 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21614 format %{"vector_byte_shift $dst,$src,$shift" %}
21615 ins_encode %{
21616 assert(UseSSE > 3, "required");
21617 int opcode = this->ideal_Opcode();
21618 bool sign = (opcode != Op_URShiftVB);
21619 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21620 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21621 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21622 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21623 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21624 %}
21625 ins_pipe( pipe_slow );
21626 %}
21627
21628 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21629 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21630 UseAVX <= 1);
21631 match(Set dst ( LShiftVB src shift));
21632 match(Set dst ( RShiftVB src shift));
21633 match(Set dst (URShiftVB src shift));
21634 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21635 format %{"vector_byte_shift $dst,$src,$shift" %}
21636 ins_encode %{
21637 assert(UseSSE > 3, "required");
21638 int opcode = this->ideal_Opcode();
21639 bool sign = (opcode != Op_URShiftVB);
21640 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21641 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21642 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21643 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21644 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21645 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21646 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21647 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21648 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21649 %}
21650 ins_pipe( pipe_slow );
21651 %}
21652
21653 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21654 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21655 UseAVX > 1);
21656 match(Set dst ( LShiftVB src shift));
21657 match(Set dst ( RShiftVB src shift));
21658 match(Set dst (URShiftVB src shift));
21659 effect(TEMP dst, TEMP tmp);
21660 format %{"vector_byte_shift $dst,$src,$shift" %}
21661 ins_encode %{
21662 int opcode = this->ideal_Opcode();
21663 bool sign = (opcode != Op_URShiftVB);
21664 int vlen_enc = Assembler::AVX_256bit;
21665 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21666 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21667 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21668 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21669 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21670 %}
21671 ins_pipe( pipe_slow );
21672 %}
21673
21674 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21675 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21676 match(Set dst ( LShiftVB src shift));
21677 match(Set dst ( RShiftVB src shift));
21678 match(Set dst (URShiftVB src shift));
21679 effect(TEMP dst, TEMP tmp);
21680 format %{"vector_byte_shift $dst,$src,$shift" %}
21681 ins_encode %{
21682 assert(UseAVX > 1, "required");
21683 int opcode = this->ideal_Opcode();
21684 bool sign = (opcode != Op_URShiftVB);
21685 int vlen_enc = Assembler::AVX_256bit;
21686 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21687 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21688 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21689 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21690 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21691 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21692 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21693 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21694 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21695 %}
21696 ins_pipe( pipe_slow );
21697 %}
21698
21699 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21700 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21701 match(Set dst ( LShiftVB src shift));
21702 match(Set dst (RShiftVB src shift));
21703 match(Set dst (URShiftVB src shift));
21704 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21705 format %{"vector_byte_shift $dst,$src,$shift" %}
21706 ins_encode %{
21707 assert(UseAVX > 2, "required");
21708 int opcode = this->ideal_Opcode();
21709 bool sign = (opcode != Op_URShiftVB);
21710 int vlen_enc = Assembler::AVX_512bit;
21711 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21712 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21713 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21714 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21715 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21716 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21717 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21718 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21719 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21720 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21721 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21722 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21723 %}
21724 ins_pipe( pipe_slow );
21725 %}
21726
21727 // Shorts vector logical right shift produces incorrect Java result
21728 // for negative data because java code convert short value into int with
21729 // sign extension before a shift. But char vectors are fine since chars are
21730 // unsigned values.
21731 // Shorts/Chars vector left shift
21732 instruct vshiftS(vec dst, vec src, vec shift) %{
21733 predicate(!n->as_ShiftV()->is_var_shift());
21734 match(Set dst ( LShiftVS src shift));
21735 match(Set dst ( RShiftVS src shift));
21736 match(Set dst (URShiftVS src shift));
21737 effect(TEMP dst, USE src, USE shift);
21738 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21739 ins_encode %{
21740 int opcode = this->ideal_Opcode();
21741 if (UseAVX > 0) {
21742 int vlen_enc = vector_length_encoding(this);
21743 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21744 } else {
21745 int vlen = Matcher::vector_length(this);
21746 if (vlen == 2) {
21747 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21748 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21749 } else if (vlen == 4) {
21750 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21751 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21752 } else {
21753 assert (vlen == 8, "sanity");
21754 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21755 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21756 }
21757 }
21758 %}
21759 ins_pipe( pipe_slow );
21760 %}
21761
21762 // Integers vector left shift
21763 instruct vshiftI(vec dst, vec src, vec shift) %{
21764 predicate(!n->as_ShiftV()->is_var_shift());
21765 match(Set dst ( LShiftVI src shift));
21766 match(Set dst ( RShiftVI src shift));
21767 match(Set dst (URShiftVI src shift));
21768 effect(TEMP dst, USE src, USE shift);
21769 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21770 ins_encode %{
21771 int opcode = this->ideal_Opcode();
21772 if (UseAVX > 0) {
21773 int vlen_enc = vector_length_encoding(this);
21774 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21775 } else {
21776 int vlen = Matcher::vector_length(this);
21777 if (vlen == 2) {
21778 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21779 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21780 } else {
21781 assert(vlen == 4, "sanity");
21782 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21783 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21784 }
21785 }
21786 %}
21787 ins_pipe( pipe_slow );
21788 %}
21789
21790 // Integers vector left constant shift
21791 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21792 match(Set dst (LShiftVI src (LShiftCntV shift)));
21793 match(Set dst (RShiftVI src (RShiftCntV shift)));
21794 match(Set dst (URShiftVI src (RShiftCntV shift)));
21795 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21796 ins_encode %{
21797 int opcode = this->ideal_Opcode();
21798 if (UseAVX > 0) {
21799 int vector_len = vector_length_encoding(this);
21800 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21801 } else {
21802 int vlen = Matcher::vector_length(this);
21803 if (vlen == 2) {
21804 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21805 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21806 } else {
21807 assert(vlen == 4, "sanity");
21808 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21809 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21810 }
21811 }
21812 %}
21813 ins_pipe( pipe_slow );
21814 %}
21815
21816 // Longs vector shift
21817 instruct vshiftL(vec dst, vec src, vec shift) %{
21818 predicate(!n->as_ShiftV()->is_var_shift());
21819 match(Set dst ( LShiftVL src shift));
21820 match(Set dst (URShiftVL src shift));
21821 effect(TEMP dst, USE src, USE shift);
21822 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21823 ins_encode %{
21824 int opcode = this->ideal_Opcode();
21825 if (UseAVX > 0) {
21826 int vlen_enc = vector_length_encoding(this);
21827 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21828 } else {
21829 assert(Matcher::vector_length(this) == 2, "");
21830 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21831 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21832 }
21833 %}
21834 ins_pipe( pipe_slow );
21835 %}
21836
21837 // Longs vector constant shift
21838 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21839 match(Set dst (LShiftVL src (LShiftCntV shift)));
21840 match(Set dst (URShiftVL src (RShiftCntV shift)));
21841 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21842 ins_encode %{
21843 int opcode = this->ideal_Opcode();
21844 if (UseAVX > 0) {
21845 int vector_len = vector_length_encoding(this);
21846 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21847 } else {
21848 assert(Matcher::vector_length(this) == 2, "");
21849 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21850 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21851 }
21852 %}
21853 ins_pipe( pipe_slow );
21854 %}
21855
21856 // -------------------ArithmeticRightShift -----------------------------------
21857 // Long vector arithmetic right shift
21858 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21859 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21860 match(Set dst (RShiftVL src shift));
21861 effect(TEMP dst, TEMP tmp);
21862 format %{ "vshiftq $dst,$src,$shift" %}
21863 ins_encode %{
21864 uint vlen = Matcher::vector_length(this);
21865 if (vlen == 2) {
21866 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21867 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21868 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21869 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21870 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21871 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21872 } else {
21873 assert(vlen == 4, "sanity");
21874 assert(UseAVX > 1, "required");
21875 int vlen_enc = Assembler::AVX_256bit;
21876 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21877 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21878 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21879 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21880 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21881 }
21882 %}
21883 ins_pipe( pipe_slow );
21884 %}
21885
21886 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21887 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21888 match(Set dst (RShiftVL src shift));
21889 format %{ "vshiftq $dst,$src,$shift" %}
21890 ins_encode %{
21891 int vlen_enc = vector_length_encoding(this);
21892 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21893 %}
21894 ins_pipe( pipe_slow );
21895 %}
21896
21897 // ------------------- Variable Shift -----------------------------
21898 // Byte variable shift
21899 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21900 predicate(Matcher::vector_length(n) <= 8 &&
21901 n->as_ShiftV()->is_var_shift() &&
21902 !VM_Version::supports_avx512bw());
21903 match(Set dst ( LShiftVB src shift));
21904 match(Set dst ( RShiftVB src shift));
21905 match(Set dst (URShiftVB src shift));
21906 effect(TEMP dst, TEMP vtmp);
21907 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21908 ins_encode %{
21909 assert(UseAVX >= 2, "required");
21910
21911 int opcode = this->ideal_Opcode();
21912 int vlen_enc = Assembler::AVX_128bit;
21913 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21914 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21915 %}
21916 ins_pipe( pipe_slow );
21917 %}
21918
21919 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21920 predicate(Matcher::vector_length(n) == 16 &&
21921 n->as_ShiftV()->is_var_shift() &&
21922 !VM_Version::supports_avx512bw());
21923 match(Set dst ( LShiftVB src shift));
21924 match(Set dst ( RShiftVB src shift));
21925 match(Set dst (URShiftVB src shift));
21926 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21927 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21928 ins_encode %{
21929 assert(UseAVX >= 2, "required");
21930
21931 int opcode = this->ideal_Opcode();
21932 int vlen_enc = Assembler::AVX_128bit;
21933 // Shift lower half and get word result in dst
21934 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21935
21936 // Shift upper half and get word result in vtmp1
21937 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21938 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21939 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21940
21941 // Merge and down convert the two word results to byte in dst
21942 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21943 %}
21944 ins_pipe( pipe_slow );
21945 %}
21946
21947 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21948 predicate(Matcher::vector_length(n) == 32 &&
21949 n->as_ShiftV()->is_var_shift() &&
21950 !VM_Version::supports_avx512bw());
21951 match(Set dst ( LShiftVB src shift));
21952 match(Set dst ( RShiftVB src shift));
21953 match(Set dst (URShiftVB src shift));
21954 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21955 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21956 ins_encode %{
21957 assert(UseAVX >= 2, "required");
21958
21959 int opcode = this->ideal_Opcode();
21960 int vlen_enc = Assembler::AVX_128bit;
21961 // Process lower 128 bits and get result in dst
21962 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21963 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21964 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21965 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21966 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21967
21968 // Process higher 128 bits and get result in vtmp3
21969 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21970 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21971 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21972 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21973 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21974 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21975 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21976
21977 // Merge the two results in dst
21978 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21979 %}
21980 ins_pipe( pipe_slow );
21981 %}
21982
21983 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21984 predicate(Matcher::vector_length(n) <= 32 &&
21985 n->as_ShiftV()->is_var_shift() &&
21986 VM_Version::supports_avx512bw());
21987 match(Set dst ( LShiftVB src shift));
21988 match(Set dst ( RShiftVB src shift));
21989 match(Set dst (URShiftVB src shift));
21990 effect(TEMP dst, TEMP vtmp);
21991 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21992 ins_encode %{
21993 assert(UseAVX > 2, "required");
21994
21995 int opcode = this->ideal_Opcode();
21996 int vlen_enc = vector_length_encoding(this);
21997 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21998 %}
21999 ins_pipe( pipe_slow );
22000 %}
22001
22002 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22003 predicate(Matcher::vector_length(n) == 64 &&
22004 n->as_ShiftV()->is_var_shift() &&
22005 VM_Version::supports_avx512bw());
22006 match(Set dst ( LShiftVB src shift));
22007 match(Set dst ( RShiftVB src shift));
22008 match(Set dst (URShiftVB src shift));
22009 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22010 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
22011 ins_encode %{
22012 assert(UseAVX > 2, "required");
22013
22014 int opcode = this->ideal_Opcode();
22015 int vlen_enc = Assembler::AVX_256bit;
22016 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
22017 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
22018 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
22019 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
22020 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
22021 %}
22022 ins_pipe( pipe_slow );
22023 %}
22024
22025 // Short variable shift
22026 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
22027 predicate(Matcher::vector_length(n) <= 8 &&
22028 n->as_ShiftV()->is_var_shift() &&
22029 !VM_Version::supports_avx512bw());
22030 match(Set dst ( LShiftVS src shift));
22031 match(Set dst ( RShiftVS src shift));
22032 match(Set dst (URShiftVS src shift));
22033 effect(TEMP dst, TEMP vtmp);
22034 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22035 ins_encode %{
22036 assert(UseAVX >= 2, "required");
22037
22038 int opcode = this->ideal_Opcode();
22039 bool sign = (opcode != Op_URShiftVS);
22040 int vlen_enc = Assembler::AVX_256bit;
22041 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22042 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22043 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22044 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22045 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22046 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22047 %}
22048 ins_pipe( pipe_slow );
22049 %}
22050
22051 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22052 predicate(Matcher::vector_length(n) == 16 &&
22053 n->as_ShiftV()->is_var_shift() &&
22054 !VM_Version::supports_avx512bw());
22055 match(Set dst ( LShiftVS src shift));
22056 match(Set dst ( RShiftVS src shift));
22057 match(Set dst (URShiftVS src shift));
22058 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22059 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22060 ins_encode %{
22061 assert(UseAVX >= 2, "required");
22062
22063 int opcode = this->ideal_Opcode();
22064 bool sign = (opcode != Op_URShiftVS);
22065 int vlen_enc = Assembler::AVX_256bit;
22066 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22067 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22068 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22069 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22070 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22071
22072 // Shift upper half, with result in dst using vtmp1 as TEMP
22073 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22074 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22075 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22076 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22077 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22078 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22079
22080 // Merge lower and upper half result into dst
22081 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22082 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22083 %}
22084 ins_pipe( pipe_slow );
22085 %}
22086
22087 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22088 predicate(n->as_ShiftV()->is_var_shift() &&
22089 VM_Version::supports_avx512bw());
22090 match(Set dst ( LShiftVS src shift));
22091 match(Set dst ( RShiftVS src shift));
22092 match(Set dst (URShiftVS src shift));
22093 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22094 ins_encode %{
22095 assert(UseAVX > 2, "required");
22096
22097 int opcode = this->ideal_Opcode();
22098 int vlen_enc = vector_length_encoding(this);
22099 if (!VM_Version::supports_avx512vl()) {
22100 vlen_enc = Assembler::AVX_512bit;
22101 }
22102 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22103 %}
22104 ins_pipe( pipe_slow );
22105 %}
22106
22107 //Integer variable shift
22108 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22109 predicate(n->as_ShiftV()->is_var_shift());
22110 match(Set dst ( LShiftVI src shift));
22111 match(Set dst ( RShiftVI src shift));
22112 match(Set dst (URShiftVI src shift));
22113 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22114 ins_encode %{
22115 assert(UseAVX >= 2, "required");
22116
22117 int opcode = this->ideal_Opcode();
22118 int vlen_enc = vector_length_encoding(this);
22119 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22120 %}
22121 ins_pipe( pipe_slow );
22122 %}
22123
22124 //Long variable shift
22125 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22126 predicate(n->as_ShiftV()->is_var_shift());
22127 match(Set dst ( LShiftVL src shift));
22128 match(Set dst (URShiftVL src shift));
22129 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22130 ins_encode %{
22131 assert(UseAVX >= 2, "required");
22132
22133 int opcode = this->ideal_Opcode();
22134 int vlen_enc = vector_length_encoding(this);
22135 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22136 %}
22137 ins_pipe( pipe_slow );
22138 %}
22139
22140 //Long variable right shift arithmetic
22141 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22142 predicate(Matcher::vector_length(n) <= 4 &&
22143 n->as_ShiftV()->is_var_shift() &&
22144 UseAVX == 2);
22145 match(Set dst (RShiftVL src shift));
22146 effect(TEMP dst, TEMP vtmp);
22147 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22148 ins_encode %{
22149 int opcode = this->ideal_Opcode();
22150 int vlen_enc = vector_length_encoding(this);
22151 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22152 $vtmp$$XMMRegister);
22153 %}
22154 ins_pipe( pipe_slow );
22155 %}
22156
22157 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22158 predicate(n->as_ShiftV()->is_var_shift() &&
22159 UseAVX > 2);
22160 match(Set dst (RShiftVL src shift));
22161 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22162 ins_encode %{
22163 int opcode = this->ideal_Opcode();
22164 int vlen_enc = vector_length_encoding(this);
22165 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22166 %}
22167 ins_pipe( pipe_slow );
22168 %}
22169
22170 // --------------------------------- AND --------------------------------------
22171
22172 instruct vand(vec dst, vec src) %{
22173 predicate(UseAVX == 0);
22174 match(Set dst (AndV dst src));
22175 format %{ "pand $dst,$src\t! and vectors" %}
22176 ins_encode %{
22177 __ pand($dst$$XMMRegister, $src$$XMMRegister);
22178 %}
22179 ins_pipe( pipe_slow );
22180 %}
22181
22182 instruct vand_reg(vec dst, vec src1, vec src2) %{
22183 predicate(UseAVX > 0);
22184 match(Set dst (AndV src1 src2));
22185 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
22186 ins_encode %{
22187 int vlen_enc = vector_length_encoding(this);
22188 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22189 %}
22190 ins_pipe( pipe_slow );
22191 %}
22192
22193 instruct vand_mem(vec dst, vec src, memory mem) %{
22194 predicate((UseAVX > 0) &&
22195 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22196 match(Set dst (AndV src (LoadVector mem)));
22197 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
22198 ins_encode %{
22199 int vlen_enc = vector_length_encoding(this);
22200 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22201 %}
22202 ins_pipe( pipe_slow );
22203 %}
22204
22205 // --------------------------------- OR ---------------------------------------
22206
22207 instruct vor(vec dst, vec src) %{
22208 predicate(UseAVX == 0);
22209 match(Set dst (OrV dst src));
22210 format %{ "por $dst,$src\t! or vectors" %}
22211 ins_encode %{
22212 __ por($dst$$XMMRegister, $src$$XMMRegister);
22213 %}
22214 ins_pipe( pipe_slow );
22215 %}
22216
22217 instruct vor_reg(vec dst, vec src1, vec src2) %{
22218 predicate(UseAVX > 0);
22219 match(Set dst (OrV src1 src2));
22220 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
22221 ins_encode %{
22222 int vlen_enc = vector_length_encoding(this);
22223 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22224 %}
22225 ins_pipe( pipe_slow );
22226 %}
22227
22228 instruct vor_mem(vec dst, vec src, memory mem) %{
22229 predicate((UseAVX > 0) &&
22230 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22231 match(Set dst (OrV src (LoadVector mem)));
22232 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
22233 ins_encode %{
22234 int vlen_enc = vector_length_encoding(this);
22235 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22236 %}
22237 ins_pipe( pipe_slow );
22238 %}
22239
22240 // --------------------------------- XOR --------------------------------------
22241
22242 instruct vxor(vec dst, vec src) %{
22243 predicate(UseAVX == 0);
22244 match(Set dst (XorV dst src));
22245 format %{ "pxor $dst,$src\t! xor vectors" %}
22246 ins_encode %{
22247 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22248 %}
22249 ins_pipe( pipe_slow );
22250 %}
22251
22252 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22253 predicate(UseAVX > 0);
22254 match(Set dst (XorV src1 src2));
22255 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22256 ins_encode %{
22257 int vlen_enc = vector_length_encoding(this);
22258 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22259 %}
22260 ins_pipe( pipe_slow );
22261 %}
22262
22263 instruct vxor_mem(vec dst, vec src, memory mem) %{
22264 predicate((UseAVX > 0) &&
22265 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22266 match(Set dst (XorV src (LoadVector mem)));
22267 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22268 ins_encode %{
22269 int vlen_enc = vector_length_encoding(this);
22270 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22271 %}
22272 ins_pipe( pipe_slow );
22273 %}
22274
22275 // --------------------------------- VectorCast --------------------------------------
22276
22277 instruct vcastBtoX(vec dst, vec src) %{
22278 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22279 match(Set dst (VectorCastB2X src));
22280 format %{ "vector_cast_b2x $dst,$src\t!" %}
22281 ins_encode %{
22282 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22283 int vlen_enc = vector_length_encoding(this);
22284 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22285 %}
22286 ins_pipe( pipe_slow );
22287 %}
22288
22289 instruct vcastBtoD(legVec dst, legVec src) %{
22290 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22291 match(Set dst (VectorCastB2X src));
22292 format %{ "vector_cast_b2x $dst,$src\t!" %}
22293 ins_encode %{
22294 int vlen_enc = vector_length_encoding(this);
22295 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22296 %}
22297 ins_pipe( pipe_slow );
22298 %}
22299
22300 instruct castStoX(vec dst, vec src) %{
22301 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22302 Matcher::vector_length(n->in(1)) <= 8 && // src
22303 Matcher::vector_element_basic_type(n) == T_BYTE);
22304 match(Set dst (VectorCastS2X src));
22305 format %{ "vector_cast_s2x $dst,$src" %}
22306 ins_encode %{
22307 assert(UseAVX > 0, "required");
22308
22309 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22310 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22311 %}
22312 ins_pipe( pipe_slow );
22313 %}
22314
22315 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22316 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22317 Matcher::vector_length(n->in(1)) == 16 && // src
22318 Matcher::vector_element_basic_type(n) == T_BYTE);
22319 effect(TEMP dst, TEMP vtmp);
22320 match(Set dst (VectorCastS2X src));
22321 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22322 ins_encode %{
22323 assert(UseAVX > 0, "required");
22324
22325 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22326 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22327 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22328 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22329 %}
22330 ins_pipe( pipe_slow );
22331 %}
22332
22333 instruct vcastStoX_evex(vec dst, vec src) %{
22334 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22335 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22336 match(Set dst (VectorCastS2X src));
22337 format %{ "vector_cast_s2x $dst,$src\t!" %}
22338 ins_encode %{
22339 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22340 int src_vlen_enc = vector_length_encoding(this, $src);
22341 int vlen_enc = vector_length_encoding(this);
22342 switch (to_elem_bt) {
22343 case T_BYTE:
22344 if (!VM_Version::supports_avx512vl()) {
22345 vlen_enc = Assembler::AVX_512bit;
22346 }
22347 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22348 break;
22349 case T_INT:
22350 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22351 break;
22352 case T_FLOAT:
22353 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22354 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22355 break;
22356 case T_LONG:
22357 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22358 break;
22359 case T_DOUBLE: {
22360 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22361 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22362 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22363 break;
22364 }
22365 default:
22366 ShouldNotReachHere();
22367 }
22368 %}
22369 ins_pipe( pipe_slow );
22370 %}
22371
22372 instruct castItoX(vec dst, vec src) %{
22373 predicate(UseAVX <= 2 &&
22374 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22375 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22376 match(Set dst (VectorCastI2X src));
22377 format %{ "vector_cast_i2x $dst,$src" %}
22378 ins_encode %{
22379 assert(UseAVX > 0, "required");
22380
22381 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22382 int vlen_enc = vector_length_encoding(this, $src);
22383
22384 if (to_elem_bt == T_BYTE) {
22385 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22386 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22387 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22388 } else {
22389 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22390 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22391 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22392 }
22393 %}
22394 ins_pipe( pipe_slow );
22395 %}
22396
22397 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22398 predicate(UseAVX <= 2 &&
22399 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22400 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22401 match(Set dst (VectorCastI2X src));
22402 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22403 effect(TEMP dst, TEMP vtmp);
22404 ins_encode %{
22405 assert(UseAVX > 0, "required");
22406
22407 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22408 int vlen_enc = vector_length_encoding(this, $src);
22409
22410 if (to_elem_bt == T_BYTE) {
22411 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22412 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22413 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22414 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22415 } else {
22416 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22417 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22418 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22419 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22420 }
22421 %}
22422 ins_pipe( pipe_slow );
22423 %}
22424
22425 instruct vcastItoX_evex(vec dst, vec src) %{
22426 predicate(UseAVX > 2 ||
22427 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22428 match(Set dst (VectorCastI2X src));
22429 format %{ "vector_cast_i2x $dst,$src\t!" %}
22430 ins_encode %{
22431 assert(UseAVX > 0, "required");
22432
22433 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22434 int src_vlen_enc = vector_length_encoding(this, $src);
22435 int dst_vlen_enc = vector_length_encoding(this);
22436 switch (dst_elem_bt) {
22437 case T_BYTE:
22438 if (!VM_Version::supports_avx512vl()) {
22439 src_vlen_enc = Assembler::AVX_512bit;
22440 }
22441 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22442 break;
22443 case T_SHORT:
22444 if (!VM_Version::supports_avx512vl()) {
22445 src_vlen_enc = Assembler::AVX_512bit;
22446 }
22447 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22448 break;
22449 case T_FLOAT:
22450 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22451 break;
22452 case T_LONG:
22453 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22454 break;
22455 case T_DOUBLE:
22456 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22457 break;
22458 default:
22459 ShouldNotReachHere();
22460 }
22461 %}
22462 ins_pipe( pipe_slow );
22463 %}
22464
22465 instruct vcastLtoBS(vec dst, vec src) %{
22466 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22467 UseAVX <= 2);
22468 match(Set dst (VectorCastL2X src));
22469 format %{ "vector_cast_l2x $dst,$src" %}
22470 ins_encode %{
22471 assert(UseAVX > 0, "required");
22472
22473 int vlen = Matcher::vector_length_in_bytes(this, $src);
22474 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22475 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22476 : ExternalAddress(vector_int_to_short_mask());
22477 if (vlen <= 16) {
22478 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22479 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22480 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22481 } else {
22482 assert(vlen <= 32, "required");
22483 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22484 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22485 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22486 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22487 }
22488 if (to_elem_bt == T_BYTE) {
22489 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22490 }
22491 %}
22492 ins_pipe( pipe_slow );
22493 %}
22494
22495 instruct vcastLtoX_evex(vec dst, vec src) %{
22496 predicate(UseAVX > 2 ||
22497 (Matcher::vector_element_basic_type(n) == T_INT ||
22498 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22499 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22500 match(Set dst (VectorCastL2X src));
22501 format %{ "vector_cast_l2x $dst,$src\t!" %}
22502 ins_encode %{
22503 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22504 int vlen = Matcher::vector_length_in_bytes(this, $src);
22505 int vlen_enc = vector_length_encoding(this, $src);
22506 switch (to_elem_bt) {
22507 case T_BYTE:
22508 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22509 vlen_enc = Assembler::AVX_512bit;
22510 }
22511 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22512 break;
22513 case T_SHORT:
22514 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22515 vlen_enc = Assembler::AVX_512bit;
22516 }
22517 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22518 break;
22519 case T_INT:
22520 if (vlen == 8) {
22521 if ($dst$$XMMRegister != $src$$XMMRegister) {
22522 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22523 }
22524 } else if (vlen == 16) {
22525 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22526 } else if (vlen == 32) {
22527 if (UseAVX > 2) {
22528 if (!VM_Version::supports_avx512vl()) {
22529 vlen_enc = Assembler::AVX_512bit;
22530 }
22531 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22532 } else {
22533 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22534 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22535 }
22536 } else { // vlen == 64
22537 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22538 }
22539 break;
22540 case T_FLOAT:
22541 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22542 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22543 break;
22544 case T_DOUBLE:
22545 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22546 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22547 break;
22548
22549 default: assert(false, "%s", type2name(to_elem_bt));
22550 }
22551 %}
22552 ins_pipe( pipe_slow );
22553 %}
22554
22555 instruct vcastFtoD_reg(vec dst, vec src) %{
22556 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22557 match(Set dst (VectorCastF2X src));
22558 format %{ "vector_cast_f2d $dst,$src\t!" %}
22559 ins_encode %{
22560 int vlen_enc = vector_length_encoding(this);
22561 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22562 %}
22563 ins_pipe( pipe_slow );
22564 %}
22565
22566
22567 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22568 predicate(!VM_Version::supports_avx10_2() &&
22569 !VM_Version::supports_avx512vl() &&
22570 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22571 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22572 is_integral_type(Matcher::vector_element_basic_type(n)));
22573 match(Set dst (VectorCastF2X src));
22574 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22575 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22576 ins_encode %{
22577 int vlen_enc = vector_length_encoding(this, $src);
22578 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22579 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22580 // 32 bit addresses for register indirect addressing mode since stub constants
22581 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22582 // However, targets are free to increase this limit, but having a large code cache size
22583 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22584 // cap we save a temporary register allocation which in limiting case can prevent
22585 // spilling in high register pressure blocks.
22586 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22587 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22588 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22589 %}
22590 ins_pipe( pipe_slow );
22591 %}
22592
22593 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22594 predicate(!VM_Version::supports_avx10_2() &&
22595 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22596 is_integral_type(Matcher::vector_element_basic_type(n)));
22597 match(Set dst (VectorCastF2X src));
22598 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22599 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22600 ins_encode %{
22601 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22602 if (to_elem_bt == T_LONG) {
22603 int vlen_enc = vector_length_encoding(this);
22604 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22605 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22606 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22607 } else {
22608 int vlen_enc = vector_length_encoding(this, $src);
22609 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22610 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22611 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22612 }
22613 %}
22614 ins_pipe( pipe_slow );
22615 %}
22616
22617 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22618 predicate(VM_Version::supports_avx10_2() &&
22619 is_integral_type(Matcher::vector_element_basic_type(n)));
22620 match(Set dst (VectorCastF2X src));
22621 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22622 ins_encode %{
22623 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22624 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22625 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22626 %}
22627 ins_pipe( pipe_slow );
22628 %}
22629
22630 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22631 predicate(VM_Version::supports_avx10_2() &&
22632 is_integral_type(Matcher::vector_element_basic_type(n)));
22633 match(Set dst (VectorCastF2X (LoadVector src)));
22634 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22635 ins_encode %{
22636 int vlen = Matcher::vector_length(this);
22637 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22638 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22639 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22640 %}
22641 ins_pipe( pipe_slow );
22642 %}
22643
22644 instruct vcastDtoF_reg(vec dst, vec src) %{
22645 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22646 match(Set dst (VectorCastD2X src));
22647 format %{ "vector_cast_d2x $dst,$src\t!" %}
22648 ins_encode %{
22649 int vlen_enc = vector_length_encoding(this, $src);
22650 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22651 %}
22652 ins_pipe( pipe_slow );
22653 %}
22654
22655 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22656 predicate(!VM_Version::supports_avx10_2() &&
22657 !VM_Version::supports_avx512vl() &&
22658 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22659 is_integral_type(Matcher::vector_element_basic_type(n)));
22660 match(Set dst (VectorCastD2X src));
22661 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22662 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22663 ins_encode %{
22664 int vlen_enc = vector_length_encoding(this, $src);
22665 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22666 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22667 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22668 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22669 %}
22670 ins_pipe( pipe_slow );
22671 %}
22672
22673 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22674 predicate(!VM_Version::supports_avx10_2() &&
22675 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22676 is_integral_type(Matcher::vector_element_basic_type(n)));
22677 match(Set dst (VectorCastD2X src));
22678 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22679 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22680 ins_encode %{
22681 int vlen_enc = vector_length_encoding(this, $src);
22682 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22683 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22684 ExternalAddress(vector_float_signflip());
22685 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22686 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22687 %}
22688 ins_pipe( pipe_slow );
22689 %}
22690
22691 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22692 predicate(VM_Version::supports_avx10_2() &&
22693 is_integral_type(Matcher::vector_element_basic_type(n)));
22694 match(Set dst (VectorCastD2X src));
22695 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22696 ins_encode %{
22697 int vlen_enc = vector_length_encoding(this, $src);
22698 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22699 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22700 %}
22701 ins_pipe( pipe_slow );
22702 %}
22703
22704 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22705 predicate(VM_Version::supports_avx10_2() &&
22706 is_integral_type(Matcher::vector_element_basic_type(n)));
22707 match(Set dst (VectorCastD2X (LoadVector src)));
22708 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22709 ins_encode %{
22710 int vlen = Matcher::vector_length(this);
22711 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22712 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22713 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22714 %}
22715 ins_pipe( pipe_slow );
22716 %}
22717
22718 instruct vucast(vec dst, vec src) %{
22719 match(Set dst (VectorUCastB2X src));
22720 match(Set dst (VectorUCastS2X src));
22721 match(Set dst (VectorUCastI2X src));
22722 format %{ "vector_ucast $dst,$src\t!" %}
22723 ins_encode %{
22724 assert(UseAVX > 0, "required");
22725
22726 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22727 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22728 int vlen_enc = vector_length_encoding(this);
22729 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22730 %}
22731 ins_pipe( pipe_slow );
22732 %}
22733
22734 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22735 predicate(!VM_Version::supports_avx512vl() &&
22736 Matcher::vector_length_in_bytes(n) < 64 &&
22737 Matcher::vector_element_basic_type(n) == T_INT);
22738 match(Set dst (RoundVF src));
22739 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22740 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22741 ins_encode %{
22742 int vlen_enc = vector_length_encoding(this);
22743 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22744 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22745 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22746 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22747 %}
22748 ins_pipe( pipe_slow );
22749 %}
22750
22751 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22752 predicate((VM_Version::supports_avx512vl() ||
22753 Matcher::vector_length_in_bytes(n) == 64) &&
22754 Matcher::vector_element_basic_type(n) == T_INT);
22755 match(Set dst (RoundVF src));
22756 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22757 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22758 ins_encode %{
22759 int vlen_enc = vector_length_encoding(this);
22760 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22761 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22762 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22763 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22764 %}
22765 ins_pipe( pipe_slow );
22766 %}
22767
22768 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22769 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22770 match(Set dst (RoundVD src));
22771 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22772 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22773 ins_encode %{
22774 int vlen_enc = vector_length_encoding(this);
22775 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22776 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22777 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22778 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22779 %}
22780 ins_pipe( pipe_slow );
22781 %}
22782
22783 // --------------------------------- VectorMaskCmp --------------------------------------
22784
22785 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22786 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22787 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22788 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22789 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22790 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22791 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22792 ins_encode %{
22793 int vlen_enc = vector_length_encoding(this, $src1);
22794 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22795 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22796 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22797 } else {
22798 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22799 }
22800 %}
22801 ins_pipe( pipe_slow );
22802 %}
22803
22804 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22805 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22806 n->bottom_type()->isa_vectmask() == nullptr &&
22807 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22808 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22809 effect(TEMP ktmp);
22810 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22811 ins_encode %{
22812 int vlen_enc = Assembler::AVX_512bit;
22813 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22814 KRegister mask = k0; // The comparison itself is not being masked.
22815 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22816 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22817 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22818 } else {
22819 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22820 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22821 }
22822 %}
22823 ins_pipe( pipe_slow );
22824 %}
22825
22826 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22827 predicate(n->bottom_type()->isa_vectmask() &&
22828 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22829 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22830 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22831 ins_encode %{
22832 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22833 int vlen_enc = vector_length_encoding(this, $src1);
22834 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22835 KRegister mask = k0; // The comparison itself is not being masked.
22836 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22837 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22838 } else {
22839 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22840 }
22841 %}
22842 ins_pipe( pipe_slow );
22843 %}
22844
22845 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22846 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22847 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22848 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22849 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22850 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22851 (n->in(2)->get_int() == BoolTest::eq ||
22852 n->in(2)->get_int() == BoolTest::lt ||
22853 n->in(2)->get_int() == BoolTest::gt)); // cond
22854 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22855 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22856 ins_encode %{
22857 int vlen_enc = vector_length_encoding(this, $src1);
22858 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22859 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22860 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22861 %}
22862 ins_pipe( pipe_slow );
22863 %}
22864
22865 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22866 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22867 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22868 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22869 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22870 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22871 (n->in(2)->get_int() == BoolTest::ne ||
22872 n->in(2)->get_int() == BoolTest::le ||
22873 n->in(2)->get_int() == BoolTest::ge)); // cond
22874 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22875 effect(TEMP dst, TEMP xtmp);
22876 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22877 ins_encode %{
22878 int vlen_enc = vector_length_encoding(this, $src1);
22879 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22880 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22881 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22882 %}
22883 ins_pipe( pipe_slow );
22884 %}
22885
22886 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22887 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22888 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22889 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22890 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22891 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22892 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22893 effect(TEMP dst, TEMP xtmp);
22894 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22895 ins_encode %{
22896 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22897 int vlen_enc = vector_length_encoding(this, $src1);
22898 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22899 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22900
22901 if (vlen_enc == Assembler::AVX_128bit) {
22902 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22903 } else {
22904 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22905 }
22906 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22907 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22908 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22909 %}
22910 ins_pipe( pipe_slow );
22911 %}
22912
22913 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22914 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22915 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22916 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22917 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22918 effect(TEMP ktmp);
22919 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22920 ins_encode %{
22921 assert(UseAVX > 2, "required");
22922
22923 int vlen_enc = vector_length_encoding(this, $src1);
22924 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22925 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22926 KRegister mask = k0; // The comparison itself is not being masked.
22927 bool merge = false;
22928 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22929
22930 switch (src1_elem_bt) {
22931 case T_INT: {
22932 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22933 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22934 break;
22935 }
22936 case T_LONG: {
22937 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22938 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22939 break;
22940 }
22941 default: assert(false, "%s", type2name(src1_elem_bt));
22942 }
22943 %}
22944 ins_pipe( pipe_slow );
22945 %}
22946
22947
22948 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22949 predicate(n->bottom_type()->isa_vectmask() &&
22950 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22951 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22952 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22953 ins_encode %{
22954 assert(UseAVX > 2, "required");
22955 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22956
22957 int vlen_enc = vector_length_encoding(this, $src1);
22958 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22959 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22960 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22961
22962 // Comparison i
22963 switch (src1_elem_bt) {
22964 case T_BYTE: {
22965 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22966 break;
22967 }
22968 case T_SHORT: {
22969 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22970 break;
22971 }
22972 case T_INT: {
22973 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22974 break;
22975 }
22976 case T_LONG: {
22977 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22978 break;
22979 }
22980 default: assert(false, "%s", type2name(src1_elem_bt));
22981 }
22982 %}
22983 ins_pipe( pipe_slow );
22984 %}
22985
22986 // Extract
22987
22988 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22989 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22990 match(Set dst (ExtractI src idx));
22991 match(Set dst (ExtractS src idx));
22992 match(Set dst (ExtractB src idx));
22993 format %{ "extractI $dst,$src,$idx\t!" %}
22994 ins_encode %{
22995 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22996
22997 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22998 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22999 %}
23000 ins_pipe( pipe_slow );
23001 %}
23002
23003 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
23004 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
23005 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
23006 match(Set dst (ExtractI src idx));
23007 match(Set dst (ExtractS src idx));
23008 match(Set dst (ExtractB src idx));
23009 effect(TEMP vtmp);
23010 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
23011 ins_encode %{
23012 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23013
23014 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
23015 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23016 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
23017 %}
23018 ins_pipe( pipe_slow );
23019 %}
23020
23021 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
23022 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
23023 match(Set dst (ExtractL src idx));
23024 format %{ "extractL $dst,$src,$idx\t!" %}
23025 ins_encode %{
23026 assert(UseSSE >= 4, "required");
23027 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23028
23029 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
23030 %}
23031 ins_pipe( pipe_slow );
23032 %}
23033
23034 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23035 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23036 Matcher::vector_length(n->in(1)) == 8); // src
23037 match(Set dst (ExtractL src idx));
23038 effect(TEMP vtmp);
23039 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23040 ins_encode %{
23041 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23042
23043 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23044 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23045 %}
23046 ins_pipe( pipe_slow );
23047 %}
23048
23049 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23050 predicate(Matcher::vector_length(n->in(1)) <= 4);
23051 match(Set dst (ExtractF src idx));
23052 effect(TEMP dst, TEMP vtmp);
23053 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23054 ins_encode %{
23055 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23056
23057 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23058 %}
23059 ins_pipe( pipe_slow );
23060 %}
23061
23062 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23063 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23064 Matcher::vector_length(n->in(1)/*src*/) == 16);
23065 match(Set dst (ExtractF src idx));
23066 effect(TEMP vtmp);
23067 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23068 ins_encode %{
23069 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23070
23071 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23072 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23073 %}
23074 ins_pipe( pipe_slow );
23075 %}
23076
23077 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23078 predicate(Matcher::vector_length(n->in(1)) == 2); // src
23079 match(Set dst (ExtractD src idx));
23080 format %{ "extractD $dst,$src,$idx\t!" %}
23081 ins_encode %{
23082 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23083
23084 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23085 %}
23086 ins_pipe( pipe_slow );
23087 %}
23088
23089 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23090 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23091 Matcher::vector_length(n->in(1)) == 8); // src
23092 match(Set dst (ExtractD src idx));
23093 effect(TEMP vtmp);
23094 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23095 ins_encode %{
23096 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23097
23098 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23099 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23100 %}
23101 ins_pipe( pipe_slow );
23102 %}
23103
23104 // --------------------------------- Vector Blend --------------------------------------
23105
23106 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23107 predicate(UseAVX == 0);
23108 match(Set dst (VectorBlend (Binary dst src) mask));
23109 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
23110 effect(TEMP tmp);
23111 ins_encode %{
23112 assert(UseSSE >= 4, "required");
23113
23114 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23115 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23116 }
23117 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23118 %}
23119 ins_pipe( pipe_slow );
23120 %}
23121
23122 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23123 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23124 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23125 Matcher::vector_length_in_bytes(n) <= 32 &&
23126 is_integral_type(Matcher::vector_element_basic_type(n)));
23127 match(Set dst (VectorBlend (Binary src1 src2) mask));
23128 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23129 ins_encode %{
23130 int vlen_enc = vector_length_encoding(this);
23131 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23132 %}
23133 ins_pipe( pipe_slow );
23134 %}
23135
23136 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23137 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23138 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23139 Matcher::vector_length_in_bytes(n) <= 32 &&
23140 !is_integral_type(Matcher::vector_element_basic_type(n)));
23141 match(Set dst (VectorBlend (Binary src1 src2) mask));
23142 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23143 ins_encode %{
23144 int vlen_enc = vector_length_encoding(this);
23145 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23146 %}
23147 ins_pipe( pipe_slow );
23148 %}
23149
23150 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23151 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23152 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23153 Matcher::vector_length_in_bytes(n) <= 32);
23154 match(Set dst (VectorBlend (Binary src1 src2) mask));
23155 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23156 effect(TEMP vtmp, TEMP dst);
23157 ins_encode %{
23158 int vlen_enc = vector_length_encoding(this);
23159 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23160 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23161 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23162 %}
23163 ins_pipe( pipe_slow );
23164 %}
23165
23166 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23167 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23168 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23169 match(Set dst (VectorBlend (Binary src1 src2) mask));
23170 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23171 effect(TEMP ktmp);
23172 ins_encode %{
23173 int vlen_enc = Assembler::AVX_512bit;
23174 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23175 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23176 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23177 %}
23178 ins_pipe( pipe_slow );
23179 %}
23180
23181
23182 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23183 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23184 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23185 VM_Version::supports_avx512bw()));
23186 match(Set dst (VectorBlend (Binary src1 src2) mask));
23187 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23188 ins_encode %{
23189 int vlen_enc = vector_length_encoding(this);
23190 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23191 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23192 %}
23193 ins_pipe( pipe_slow );
23194 %}
23195
23196 // --------------------------------- ABS --------------------------------------
23197 // a = |a|
23198 instruct vabsB_reg(vec dst, vec src) %{
23199 match(Set dst (AbsVB src));
23200 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23201 ins_encode %{
23202 uint vlen = Matcher::vector_length(this);
23203 if (vlen <= 16) {
23204 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23205 } else {
23206 int vlen_enc = vector_length_encoding(this);
23207 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23208 }
23209 %}
23210 ins_pipe( pipe_slow );
23211 %}
23212
23213 instruct vabsS_reg(vec dst, vec src) %{
23214 match(Set dst (AbsVS src));
23215 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23216 ins_encode %{
23217 uint vlen = Matcher::vector_length(this);
23218 if (vlen <= 8) {
23219 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23220 } else {
23221 int vlen_enc = vector_length_encoding(this);
23222 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23223 }
23224 %}
23225 ins_pipe( pipe_slow );
23226 %}
23227
23228 instruct vabsI_reg(vec dst, vec src) %{
23229 match(Set dst (AbsVI src));
23230 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23231 ins_encode %{
23232 uint vlen = Matcher::vector_length(this);
23233 if (vlen <= 4) {
23234 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23235 } else {
23236 int vlen_enc = vector_length_encoding(this);
23237 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23238 }
23239 %}
23240 ins_pipe( pipe_slow );
23241 %}
23242
23243 instruct vabsL_reg(vec dst, vec src) %{
23244 match(Set dst (AbsVL src));
23245 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23246 ins_encode %{
23247 assert(UseAVX > 2, "required");
23248 int vlen_enc = vector_length_encoding(this);
23249 if (!VM_Version::supports_avx512vl()) {
23250 vlen_enc = Assembler::AVX_512bit;
23251 }
23252 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23253 %}
23254 ins_pipe( pipe_slow );
23255 %}
23256
23257 // --------------------------------- ABSNEG --------------------------------------
23258
23259 instruct vabsnegF(vec dst, vec src) %{
23260 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23261 match(Set dst (AbsVF src));
23262 match(Set dst (NegVF src));
23263 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23264 ins_cost(150);
23265 ins_encode %{
23266 int opcode = this->ideal_Opcode();
23267 int vlen = Matcher::vector_length(this);
23268 if (vlen == 2) {
23269 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23270 } else {
23271 assert(vlen == 8 || vlen == 16, "required");
23272 int vlen_enc = vector_length_encoding(this);
23273 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23274 }
23275 %}
23276 ins_pipe( pipe_slow );
23277 %}
23278
23279 instruct vabsneg4F(vec dst) %{
23280 predicate(Matcher::vector_length(n) == 4);
23281 match(Set dst (AbsVF dst));
23282 match(Set dst (NegVF dst));
23283 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23284 ins_cost(150);
23285 ins_encode %{
23286 int opcode = this->ideal_Opcode();
23287 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23288 %}
23289 ins_pipe( pipe_slow );
23290 %}
23291
23292 instruct vabsnegD(vec dst, vec src) %{
23293 match(Set dst (AbsVD src));
23294 match(Set dst (NegVD src));
23295 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23296 ins_encode %{
23297 int opcode = this->ideal_Opcode();
23298 uint vlen = Matcher::vector_length(this);
23299 if (vlen == 2) {
23300 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23301 } else {
23302 int vlen_enc = vector_length_encoding(this);
23303 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23304 }
23305 %}
23306 ins_pipe( pipe_slow );
23307 %}
23308
23309 //------------------------------------- VectorTest --------------------------------------------
23310
23311 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23312 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23313 match(Set cr (VectorTest src1 src2));
23314 effect(TEMP vtmp);
23315 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23316 ins_encode %{
23317 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23318 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23319 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23320 %}
23321 ins_pipe( pipe_slow );
23322 %}
23323
23324 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23325 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23326 match(Set cr (VectorTest src1 src2));
23327 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23328 ins_encode %{
23329 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23330 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23331 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23332 %}
23333 ins_pipe( pipe_slow );
23334 %}
23335
23336 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23337 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23338 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23339 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23340 match(Set cr (VectorTest src1 src2));
23341 effect(TEMP tmp);
23342 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23343 ins_encode %{
23344 uint masklen = Matcher::vector_length(this, $src1);
23345 __ kmovwl($tmp$$Register, $src1$$KRegister);
23346 __ andl($tmp$$Register, (1 << masklen) - 1);
23347 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23348 %}
23349 ins_pipe( pipe_slow );
23350 %}
23351
23352 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23353 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23354 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23355 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23356 match(Set cr (VectorTest src1 src2));
23357 effect(TEMP tmp);
23358 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23359 ins_encode %{
23360 uint masklen = Matcher::vector_length(this, $src1);
23361 __ kmovwl($tmp$$Register, $src1$$KRegister);
23362 __ andl($tmp$$Register, (1 << masklen) - 1);
23363 %}
23364 ins_pipe( pipe_slow );
23365 %}
23366
23367 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23368 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23369 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23370 match(Set cr (VectorTest src1 src2));
23371 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23372 ins_encode %{
23373 uint masklen = Matcher::vector_length(this, $src1);
23374 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23375 %}
23376 ins_pipe( pipe_slow );
23377 %}
23378
23379 //------------------------------------- LoadMask --------------------------------------------
23380
23381 instruct loadMask(legVec dst, legVec src) %{
23382 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23383 match(Set dst (VectorLoadMask src));
23384 effect(TEMP dst);
23385 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23386 ins_encode %{
23387 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23388 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23389 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23390 %}
23391 ins_pipe( pipe_slow );
23392 %}
23393
23394 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23395 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23396 match(Set dst (VectorLoadMask src));
23397 effect(TEMP xtmp);
23398 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23399 ins_encode %{
23400 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23401 true, Assembler::AVX_512bit);
23402 %}
23403 ins_pipe( pipe_slow );
23404 %}
23405
23406 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23407 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23408 match(Set dst (VectorLoadMask src));
23409 effect(TEMP xtmp);
23410 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23411 ins_encode %{
23412 int vlen_enc = vector_length_encoding(in(1));
23413 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23414 false, vlen_enc);
23415 %}
23416 ins_pipe( pipe_slow );
23417 %}
23418
23419 //------------------------------------- StoreMask --------------------------------------------
23420
23421 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23422 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23423 match(Set dst (VectorStoreMask src size));
23424 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23425 ins_encode %{
23426 int vlen = Matcher::vector_length(this);
23427 if (vlen <= 16 && UseAVX <= 2) {
23428 assert(UseSSE >= 3, "required");
23429 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23430 } else {
23431 assert(UseAVX > 0, "required");
23432 int src_vlen_enc = vector_length_encoding(this, $src);
23433 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23434 }
23435 %}
23436 ins_pipe( pipe_slow );
23437 %}
23438
23439 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23440 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23441 match(Set dst (VectorStoreMask src size));
23442 effect(TEMP_DEF dst, TEMP xtmp);
23443 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23444 ins_encode %{
23445 int vlen_enc = Assembler::AVX_128bit;
23446 int vlen = Matcher::vector_length(this);
23447 if (vlen <= 8) {
23448 assert(UseSSE >= 3, "required");
23449 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23450 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23451 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23452 } else {
23453 assert(UseAVX > 0, "required");
23454 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23455 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23456 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23457 }
23458 %}
23459 ins_pipe( pipe_slow );
23460 %}
23461
23462 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23463 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23464 match(Set dst (VectorStoreMask src size));
23465 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23466 effect(TEMP_DEF dst, TEMP xtmp);
23467 ins_encode %{
23468 int vlen_enc = Assembler::AVX_128bit;
23469 int vlen = Matcher::vector_length(this);
23470 if (vlen <= 4) {
23471 assert(UseSSE >= 3, "required");
23472 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23473 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23474 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23475 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23476 } else {
23477 assert(UseAVX > 0, "required");
23478 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23479 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23480 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23481 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23482 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23483 }
23484 %}
23485 ins_pipe( pipe_slow );
23486 %}
23487
23488 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23489 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23490 match(Set dst (VectorStoreMask src size));
23491 effect(TEMP_DEF dst, TEMP xtmp);
23492 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23493 ins_encode %{
23494 assert(UseSSE >= 3, "required");
23495 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23496 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23497 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23498 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23499 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23500 %}
23501 ins_pipe( pipe_slow );
23502 %}
23503
23504 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23505 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23506 match(Set dst (VectorStoreMask src size));
23507 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23508 effect(TEMP_DEF dst, TEMP vtmp);
23509 ins_encode %{
23510 int vlen_enc = Assembler::AVX_128bit;
23511 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23512 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23513 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23514 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23515 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23516 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23517 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23518 %}
23519 ins_pipe( pipe_slow );
23520 %}
23521
23522 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23523 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23524 match(Set dst (VectorStoreMask src size));
23525 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23526 ins_encode %{
23527 int src_vlen_enc = vector_length_encoding(this, $src);
23528 int dst_vlen_enc = vector_length_encoding(this);
23529 if (!VM_Version::supports_avx512vl()) {
23530 src_vlen_enc = Assembler::AVX_512bit;
23531 }
23532 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23533 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23534 %}
23535 ins_pipe( pipe_slow );
23536 %}
23537
23538 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23539 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23540 match(Set dst (VectorStoreMask src size));
23541 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23542 ins_encode %{
23543 int src_vlen_enc = vector_length_encoding(this, $src);
23544 int dst_vlen_enc = vector_length_encoding(this);
23545 if (!VM_Version::supports_avx512vl()) {
23546 src_vlen_enc = Assembler::AVX_512bit;
23547 }
23548 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23549 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23550 %}
23551 ins_pipe( pipe_slow );
23552 %}
23553
23554 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23555 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23556 match(Set dst (VectorStoreMask mask size));
23557 effect(TEMP_DEF dst);
23558 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23559 ins_encode %{
23560 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23561 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23562 false, Assembler::AVX_512bit, noreg);
23563 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23564 %}
23565 ins_pipe( pipe_slow );
23566 %}
23567
23568 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23569 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23570 match(Set dst (VectorStoreMask mask size));
23571 effect(TEMP_DEF dst);
23572 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23573 ins_encode %{
23574 int dst_vlen_enc = vector_length_encoding(this);
23575 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23576 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23577 %}
23578 ins_pipe( pipe_slow );
23579 %}
23580
23581 instruct vmaskcast_evex(kReg dst) %{
23582 match(Set dst (VectorMaskCast dst));
23583 ins_cost(0);
23584 format %{ "vector_mask_cast $dst" %}
23585 ins_encode %{
23586 // empty
23587 %}
23588 ins_pipe(empty);
23589 %}
23590
23591 instruct vmaskcast(vec dst) %{
23592 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23593 match(Set dst (VectorMaskCast dst));
23594 ins_cost(0);
23595 format %{ "vector_mask_cast $dst" %}
23596 ins_encode %{
23597 // empty
23598 %}
23599 ins_pipe(empty);
23600 %}
23601
23602 instruct vmaskcast_avx(vec dst, vec src) %{
23603 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23604 match(Set dst (VectorMaskCast src));
23605 format %{ "vector_mask_cast $dst, $src" %}
23606 ins_encode %{
23607 int vlen = Matcher::vector_length(this);
23608 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23609 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23610 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23611 %}
23612 ins_pipe(pipe_slow);
23613 %}
23614
23615 //-------------------------------- Load Iota Indices ----------------------------------
23616
23617 instruct loadIotaIndices(vec dst, immI_0 src) %{
23618 match(Set dst (VectorLoadConst src));
23619 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23620 ins_encode %{
23621 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23622 BasicType bt = Matcher::vector_element_basic_type(this);
23623 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23624 %}
23625 ins_pipe( pipe_slow );
23626 %}
23627
23628 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23629 match(Set dst (PopulateIndex src1 src2));
23630 effect(TEMP dst, TEMP vtmp);
23631 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23632 ins_encode %{
23633 assert($src2$$constant == 1, "required");
23634 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23635 int vlen_enc = vector_length_encoding(this);
23636 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23637 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23638 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23639 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23640 %}
23641 ins_pipe( pipe_slow );
23642 %}
23643
23644 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23645 match(Set dst (PopulateIndex src1 src2));
23646 effect(TEMP dst, TEMP vtmp);
23647 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23648 ins_encode %{
23649 assert($src2$$constant == 1, "required");
23650 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23651 int vlen_enc = vector_length_encoding(this);
23652 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23653 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23654 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23655 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23656 %}
23657 ins_pipe( pipe_slow );
23658 %}
23659
23660 //-------------------------------- Rearrange ----------------------------------
23661
23662 // LoadShuffle/Rearrange for Byte
23663 instruct rearrangeB(vec dst, vec shuffle) %{
23664 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23665 Matcher::vector_length(n) < 32);
23666 match(Set dst (VectorRearrange dst shuffle));
23667 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23668 ins_encode %{
23669 assert(UseSSE >= 4, "required");
23670 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23671 %}
23672 ins_pipe( pipe_slow );
23673 %}
23674
23675 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23676 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23677 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23678 match(Set dst (VectorRearrange src shuffle));
23679 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23680 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23681 ins_encode %{
23682 assert(UseAVX >= 2, "required");
23683 // Swap src into vtmp1
23684 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23685 // Shuffle swapped src to get entries from other 128 bit lane
23686 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23687 // Shuffle original src to get entries from self 128 bit lane
23688 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23689 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23690 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23691 // Perform the blend
23692 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23693 %}
23694 ins_pipe( pipe_slow );
23695 %}
23696
23697
23698 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23699 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23700 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23701 match(Set dst (VectorRearrange src shuffle));
23702 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23703 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23704 ins_encode %{
23705 int vlen_enc = vector_length_encoding(this);
23706 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23707 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23708 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23709 %}
23710 ins_pipe( pipe_slow );
23711 %}
23712
23713 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23714 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23715 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23716 match(Set dst (VectorRearrange src shuffle));
23717 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23718 ins_encode %{
23719 int vlen_enc = vector_length_encoding(this);
23720 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23721 %}
23722 ins_pipe( pipe_slow );
23723 %}
23724
23725 // LoadShuffle/Rearrange for Short
23726
23727 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23728 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23729 !VM_Version::supports_avx512bw());
23730 match(Set dst (VectorLoadShuffle src));
23731 effect(TEMP dst, TEMP vtmp);
23732 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23733 ins_encode %{
23734 // Create a byte shuffle mask from short shuffle mask
23735 // only byte shuffle instruction available on these platforms
23736 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23737 if (UseAVX == 0) {
23738 assert(vlen_in_bytes <= 16, "required");
23739 // Multiply each shuffle by two to get byte index
23740 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23741 __ psllw($vtmp$$XMMRegister, 1);
23742
23743 // Duplicate to create 2 copies of byte index
23744 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23745 __ psllw($dst$$XMMRegister, 8);
23746 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23747
23748 // Add one to get alternate byte index
23749 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23750 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23751 } else {
23752 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23753 int vlen_enc = vector_length_encoding(this);
23754 // Multiply each shuffle by two to get byte index
23755 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23756
23757 // Duplicate to create 2 copies of byte index
23758 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23759 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23760
23761 // Add one to get alternate byte index
23762 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23763 }
23764 %}
23765 ins_pipe( pipe_slow );
23766 %}
23767
23768 instruct rearrangeS(vec dst, vec shuffle) %{
23769 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23770 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23771 match(Set dst (VectorRearrange dst shuffle));
23772 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23773 ins_encode %{
23774 assert(UseSSE >= 4, "required");
23775 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23776 %}
23777 ins_pipe( pipe_slow );
23778 %}
23779
23780 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23781 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23782 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23783 match(Set dst (VectorRearrange src shuffle));
23784 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23785 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23786 ins_encode %{
23787 assert(UseAVX >= 2, "required");
23788 // Swap src into vtmp1
23789 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23790 // Shuffle swapped src to get entries from other 128 bit lane
23791 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23792 // Shuffle original src to get entries from self 128 bit lane
23793 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23794 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23795 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23796 // Perform the blend
23797 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23798 %}
23799 ins_pipe( pipe_slow );
23800 %}
23801
23802 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23803 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23804 VM_Version::supports_avx512bw());
23805 match(Set dst (VectorRearrange src shuffle));
23806 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23807 ins_encode %{
23808 int vlen_enc = vector_length_encoding(this);
23809 if (!VM_Version::supports_avx512vl()) {
23810 vlen_enc = Assembler::AVX_512bit;
23811 }
23812 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23813 %}
23814 ins_pipe( pipe_slow );
23815 %}
23816
23817 // LoadShuffle/Rearrange for Integer and Float
23818
23819 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23820 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23821 Matcher::vector_length(n) == 4 && UseAVX == 0);
23822 match(Set dst (VectorLoadShuffle src));
23823 effect(TEMP dst, TEMP vtmp);
23824 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23825 ins_encode %{
23826 assert(UseSSE >= 4, "required");
23827
23828 // Create a byte shuffle mask from int shuffle mask
23829 // only byte shuffle instruction available on these platforms
23830
23831 // Duplicate and multiply each shuffle by 4
23832 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23833 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23834 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23835 __ psllw($vtmp$$XMMRegister, 2);
23836
23837 // Duplicate again to create 4 copies of byte index
23838 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23839 __ psllw($dst$$XMMRegister, 8);
23840 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23841
23842 // Add 3,2,1,0 to get alternate byte index
23843 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23844 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23845 %}
23846 ins_pipe( pipe_slow );
23847 %}
23848
23849 instruct rearrangeI(vec dst, vec shuffle) %{
23850 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23851 UseAVX == 0);
23852 match(Set dst (VectorRearrange dst shuffle));
23853 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23854 ins_encode %{
23855 assert(UseSSE >= 4, "required");
23856 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23857 %}
23858 ins_pipe( pipe_slow );
23859 %}
23860
23861 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23862 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23863 UseAVX > 0);
23864 match(Set dst (VectorRearrange src shuffle));
23865 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23866 ins_encode %{
23867 int vlen_enc = vector_length_encoding(this);
23868 BasicType bt = Matcher::vector_element_basic_type(this);
23869 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23870 %}
23871 ins_pipe( pipe_slow );
23872 %}
23873
23874 // LoadShuffle/Rearrange for Long and Double
23875
23876 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23877 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23878 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23879 match(Set dst (VectorLoadShuffle src));
23880 effect(TEMP dst, TEMP vtmp);
23881 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23882 ins_encode %{
23883 assert(UseAVX >= 2, "required");
23884
23885 int vlen_enc = vector_length_encoding(this);
23886 // Create a double word shuffle mask from long shuffle mask
23887 // only double word shuffle instruction available on these platforms
23888
23889 // Multiply each shuffle by two to get double word index
23890 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23891
23892 // Duplicate each double word shuffle
23893 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23894 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23895
23896 // Add one to get alternate double word index
23897 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23898 %}
23899 ins_pipe( pipe_slow );
23900 %}
23901
23902 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23903 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23904 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23905 match(Set dst (VectorRearrange src shuffle));
23906 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23907 ins_encode %{
23908 assert(UseAVX >= 2, "required");
23909
23910 int vlen_enc = vector_length_encoding(this);
23911 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23912 %}
23913 ins_pipe( pipe_slow );
23914 %}
23915
23916 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23917 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23918 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23919 match(Set dst (VectorRearrange src shuffle));
23920 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23921 ins_encode %{
23922 assert(UseAVX > 2, "required");
23923
23924 int vlen_enc = vector_length_encoding(this);
23925 if (vlen_enc == Assembler::AVX_128bit) {
23926 vlen_enc = Assembler::AVX_256bit;
23927 }
23928 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23929 %}
23930 ins_pipe( pipe_slow );
23931 %}
23932
23933 // --------------------------------- FMA --------------------------------------
23934 // a * b + c
23935
23936 instruct vfmaF_reg(vec a, vec b, vec c) %{
23937 match(Set c (FmaVF c (Binary a b)));
23938 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23939 ins_cost(150);
23940 ins_encode %{
23941 assert(UseFMA, "not enabled");
23942 int vlen_enc = vector_length_encoding(this);
23943 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23944 %}
23945 ins_pipe( pipe_slow );
23946 %}
23947
23948 instruct vfmaF_mem(vec a, memory b, vec c) %{
23949 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23950 match(Set c (FmaVF c (Binary a (LoadVector b))));
23951 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23952 ins_cost(150);
23953 ins_encode %{
23954 assert(UseFMA, "not enabled");
23955 int vlen_enc = vector_length_encoding(this);
23956 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23957 %}
23958 ins_pipe( pipe_slow );
23959 %}
23960
23961 instruct vfmaD_reg(vec a, vec b, vec c) %{
23962 match(Set c (FmaVD c (Binary a b)));
23963 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23964 ins_cost(150);
23965 ins_encode %{
23966 assert(UseFMA, "not enabled");
23967 int vlen_enc = vector_length_encoding(this);
23968 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23969 %}
23970 ins_pipe( pipe_slow );
23971 %}
23972
23973 instruct vfmaD_mem(vec a, memory b, vec c) %{
23974 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23975 match(Set c (FmaVD c (Binary a (LoadVector b))));
23976 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23977 ins_cost(150);
23978 ins_encode %{
23979 assert(UseFMA, "not enabled");
23980 int vlen_enc = vector_length_encoding(this);
23981 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23982 %}
23983 ins_pipe( pipe_slow );
23984 %}
23985
23986 // --------------------------------- Vector Multiply Add --------------------------------------
23987
23988 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23989 predicate(UseAVX == 0);
23990 match(Set dst (MulAddVS2VI dst src1));
23991 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23992 ins_encode %{
23993 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23994 %}
23995 ins_pipe( pipe_slow );
23996 %}
23997
23998 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23999 predicate(UseAVX > 0);
24000 match(Set dst (MulAddVS2VI src1 src2));
24001 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
24002 ins_encode %{
24003 int vlen_enc = vector_length_encoding(this);
24004 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24005 %}
24006 ins_pipe( pipe_slow );
24007 %}
24008
24009 // --------------------------------- Vector Multiply Add Add ----------------------------------
24010
24011 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
24012 predicate(VM_Version::supports_avx512_vnni());
24013 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
24014 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
24015 ins_encode %{
24016 assert(UseAVX > 2, "required");
24017 int vlen_enc = vector_length_encoding(this);
24018 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24019 %}
24020 ins_pipe( pipe_slow );
24021 ins_cost(10);
24022 %}
24023
24024 // --------------------------------- PopCount --------------------------------------
24025
24026 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
24027 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24028 match(Set dst (PopCountVI src));
24029 match(Set dst (PopCountVL src));
24030 format %{ "vector_popcount_integral $dst, $src" %}
24031 ins_encode %{
24032 int opcode = this->ideal_Opcode();
24033 int vlen_enc = vector_length_encoding(this, $src);
24034 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24036 %}
24037 ins_pipe( pipe_slow );
24038 %}
24039
24040 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24041 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24042 match(Set dst (PopCountVI src mask));
24043 match(Set dst (PopCountVL src mask));
24044 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24045 ins_encode %{
24046 int vlen_enc = vector_length_encoding(this, $src);
24047 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24048 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24049 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24050 %}
24051 ins_pipe( pipe_slow );
24052 %}
24053
24054 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24055 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24056 match(Set dst (PopCountVI src));
24057 match(Set dst (PopCountVL src));
24058 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24059 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24060 ins_encode %{
24061 int opcode = this->ideal_Opcode();
24062 int vlen_enc = vector_length_encoding(this, $src);
24063 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24064 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24065 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24066 %}
24067 ins_pipe( pipe_slow );
24068 %}
24069
24070 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24071
24072 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24073 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24074 Matcher::vector_length_in_bytes(n->in(1))));
24075 match(Set dst (CountTrailingZerosV src));
24076 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24077 ins_cost(400);
24078 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24079 ins_encode %{
24080 int vlen_enc = vector_length_encoding(this, $src);
24081 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24082 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24083 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24084 %}
24085 ins_pipe( pipe_slow );
24086 %}
24087
24088 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24089 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24090 VM_Version::supports_avx512cd() &&
24091 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24092 match(Set dst (CountTrailingZerosV src));
24093 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24094 ins_cost(400);
24095 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24096 ins_encode %{
24097 int vlen_enc = vector_length_encoding(this, $src);
24098 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24099 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24100 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24101 %}
24102 ins_pipe( pipe_slow );
24103 %}
24104
24105 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24106 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24107 match(Set dst (CountTrailingZerosV src));
24108 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24109 ins_cost(400);
24110 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24111 ins_encode %{
24112 int vlen_enc = vector_length_encoding(this, $src);
24113 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24114 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24115 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24116 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24117 %}
24118 ins_pipe( pipe_slow );
24119 %}
24120
24121 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24122 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24123 match(Set dst (CountTrailingZerosV src));
24124 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24125 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24126 ins_encode %{
24127 int vlen_enc = vector_length_encoding(this, $src);
24128 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24129 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24130 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24131 %}
24132 ins_pipe( pipe_slow );
24133 %}
24134
24135
24136 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24137
24138 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24139 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24140 effect(TEMP dst);
24141 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24142 ins_encode %{
24143 int vector_len = vector_length_encoding(this);
24144 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24145 %}
24146 ins_pipe( pipe_slow );
24147 %}
24148
24149 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24150 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24151 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24152 effect(TEMP dst);
24153 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24154 ins_encode %{
24155 int vector_len = vector_length_encoding(this);
24156 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24157 %}
24158 ins_pipe( pipe_slow );
24159 %}
24160
24161 // --------------------------------- Rotation Operations ----------------------------------
24162 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24163 match(Set dst (RotateLeftV src shift));
24164 match(Set dst (RotateRightV src shift));
24165 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24166 ins_encode %{
24167 int opcode = this->ideal_Opcode();
24168 int vector_len = vector_length_encoding(this);
24169 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24170 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24171 %}
24172 ins_pipe( pipe_slow );
24173 %}
24174
24175 instruct vprorate(vec dst, vec src, vec shift) %{
24176 match(Set dst (RotateLeftV src shift));
24177 match(Set dst (RotateRightV src shift));
24178 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24179 ins_encode %{
24180 int opcode = this->ideal_Opcode();
24181 int vector_len = vector_length_encoding(this);
24182 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24183 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24184 %}
24185 ins_pipe( pipe_slow );
24186 %}
24187
24188 // ---------------------------------- Masked Operations ------------------------------------
24189 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24190 predicate(!n->in(3)->bottom_type()->isa_vectmask());
24191 match(Set dst (LoadVectorMasked mem mask));
24192 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24193 ins_encode %{
24194 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24195 int vlen_enc = vector_length_encoding(this);
24196 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24197 %}
24198 ins_pipe( pipe_slow );
24199 %}
24200
24201
24202 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24203 predicate(n->in(3)->bottom_type()->isa_vectmask());
24204 match(Set dst (LoadVectorMasked mem mask));
24205 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24206 ins_encode %{
24207 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24208 int vector_len = vector_length_encoding(this);
24209 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24210 %}
24211 ins_pipe( pipe_slow );
24212 %}
24213
24214 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24215 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24216 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24217 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24218 ins_encode %{
24219 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24220 int vlen_enc = vector_length_encoding(src_node);
24221 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24222 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24223 %}
24224 ins_pipe( pipe_slow );
24225 %}
24226
24227 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24228 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24229 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24230 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24231 ins_encode %{
24232 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24233 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24234 int vlen_enc = vector_length_encoding(src_node);
24235 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24236 %}
24237 ins_pipe( pipe_slow );
24238 %}
24239
24240 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24241 match(Set addr (VerifyVectorAlignment addr mask));
24242 effect(KILL cr);
24243 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24244 ins_encode %{
24245 Label Lskip;
24246 // check if masked bits of addr are zero
24247 __ testq($addr$$Register, $mask$$constant);
24248 __ jccb(Assembler::equal, Lskip);
24249 __ stop("verify_vector_alignment found a misaligned vector memory access");
24250 __ bind(Lskip);
24251 %}
24252 ins_pipe(pipe_slow);
24253 %}
24254
24255 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24256 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24257 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24258 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24259 ins_encode %{
24260 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24261 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24262
24263 Label DONE;
24264 int vlen_enc = vector_length_encoding(this, $src1);
24265 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24266
24267 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24268 __ mov64($dst$$Register, -1L);
24269 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24270 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24271 __ jccb(Assembler::carrySet, DONE);
24272 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24273 __ notq($dst$$Register);
24274 __ tzcntq($dst$$Register, $dst$$Register);
24275 __ bind(DONE);
24276 %}
24277 ins_pipe( pipe_slow );
24278 %}
24279
24280
24281 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24282 match(Set dst (VectorMaskGen len));
24283 effect(TEMP temp, KILL cr);
24284 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24285 ins_encode %{
24286 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24287 %}
24288 ins_pipe( pipe_slow );
24289 %}
24290
24291 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24292 match(Set dst (VectorMaskGen len));
24293 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24294 effect(TEMP temp);
24295 ins_encode %{
24296 if ($len$$constant > 0) {
24297 __ mov64($temp$$Register, right_n_bits($len$$constant));
24298 __ kmovql($dst$$KRegister, $temp$$Register);
24299 } else {
24300 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
24301 }
24302 %}
24303 ins_pipe( pipe_slow );
24304 %}
24305
24306 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24307 predicate(n->in(1)->bottom_type()->isa_vectmask());
24308 match(Set dst (VectorMaskToLong mask));
24309 effect(TEMP dst, KILL cr);
24310 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24311 ins_encode %{
24312 int opcode = this->ideal_Opcode();
24313 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24314 int mask_len = Matcher::vector_length(this, $mask);
24315 int mask_size = mask_len * type2aelembytes(mbt);
24316 int vlen_enc = vector_length_encoding(this, $mask);
24317 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24318 $dst$$Register, mask_len, mask_size, vlen_enc);
24319 %}
24320 ins_pipe( pipe_slow );
24321 %}
24322
24323 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24324 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24325 match(Set dst (VectorMaskToLong mask));
24326 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24327 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24328 ins_encode %{
24329 int opcode = this->ideal_Opcode();
24330 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24331 int mask_len = Matcher::vector_length(this, $mask);
24332 int vlen_enc = vector_length_encoding(this, $mask);
24333 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24334 $dst$$Register, mask_len, mbt, vlen_enc);
24335 %}
24336 ins_pipe( pipe_slow );
24337 %}
24338
24339 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24340 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24341 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24342 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24343 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24344 ins_encode %{
24345 int opcode = this->ideal_Opcode();
24346 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24347 int mask_len = Matcher::vector_length(this, $mask);
24348 int vlen_enc = vector_length_encoding(this, $mask);
24349 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24350 $dst$$Register, mask_len, mbt, vlen_enc);
24351 %}
24352 ins_pipe( pipe_slow );
24353 %}
24354
24355 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24356 predicate(n->in(1)->bottom_type()->isa_vectmask());
24357 match(Set dst (VectorMaskTrueCount mask));
24358 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24359 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24360 ins_encode %{
24361 int opcode = this->ideal_Opcode();
24362 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24363 int mask_len = Matcher::vector_length(this, $mask);
24364 int mask_size = mask_len * type2aelembytes(mbt);
24365 int vlen_enc = vector_length_encoding(this, $mask);
24366 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24367 $tmp$$Register, mask_len, mask_size, vlen_enc);
24368 %}
24369 ins_pipe( pipe_slow );
24370 %}
24371
24372 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24373 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24374 match(Set dst (VectorMaskTrueCount mask));
24375 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24376 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24377 ins_encode %{
24378 int opcode = this->ideal_Opcode();
24379 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24380 int mask_len = Matcher::vector_length(this, $mask);
24381 int vlen_enc = vector_length_encoding(this, $mask);
24382 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24383 $tmp$$Register, mask_len, mbt, vlen_enc);
24384 %}
24385 ins_pipe( pipe_slow );
24386 %}
24387
24388 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24389 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24390 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24391 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24392 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24393 ins_encode %{
24394 int opcode = this->ideal_Opcode();
24395 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24396 int mask_len = Matcher::vector_length(this, $mask);
24397 int vlen_enc = vector_length_encoding(this, $mask);
24398 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24399 $tmp$$Register, mask_len, mbt, vlen_enc);
24400 %}
24401 ins_pipe( pipe_slow );
24402 %}
24403
24404 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24405 predicate(n->in(1)->bottom_type()->isa_vectmask());
24406 match(Set dst (VectorMaskFirstTrue mask));
24407 match(Set dst (VectorMaskLastTrue mask));
24408 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24409 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24410 ins_encode %{
24411 int opcode = this->ideal_Opcode();
24412 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24413 int mask_len = Matcher::vector_length(this, $mask);
24414 int mask_size = mask_len * type2aelembytes(mbt);
24415 int vlen_enc = vector_length_encoding(this, $mask);
24416 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24417 $tmp$$Register, mask_len, mask_size, vlen_enc);
24418 %}
24419 ins_pipe( pipe_slow );
24420 %}
24421
24422 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24423 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24424 match(Set dst (VectorMaskFirstTrue mask));
24425 match(Set dst (VectorMaskLastTrue mask));
24426 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24427 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24428 ins_encode %{
24429 int opcode = this->ideal_Opcode();
24430 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24431 int mask_len = Matcher::vector_length(this, $mask);
24432 int vlen_enc = vector_length_encoding(this, $mask);
24433 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24434 $tmp$$Register, mask_len, mbt, vlen_enc);
24435 %}
24436 ins_pipe( pipe_slow );
24437 %}
24438
24439 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24440 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24441 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24442 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24443 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24444 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24445 ins_encode %{
24446 int opcode = this->ideal_Opcode();
24447 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24448 int mask_len = Matcher::vector_length(this, $mask);
24449 int vlen_enc = vector_length_encoding(this, $mask);
24450 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24451 $tmp$$Register, mask_len, mbt, vlen_enc);
24452 %}
24453 ins_pipe( pipe_slow );
24454 %}
24455
24456 // --------------------------------- Compress/Expand Operations ---------------------------
24457 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24458 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24459 match(Set dst (CompressV src mask));
24460 match(Set dst (ExpandV src mask));
24461 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24462 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24463 ins_encode %{
24464 int opcode = this->ideal_Opcode();
24465 int vlen_enc = vector_length_encoding(this);
24466 BasicType bt = Matcher::vector_element_basic_type(this);
24467 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24468 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24469 %}
24470 ins_pipe( pipe_slow );
24471 %}
24472
24473 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24474 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24475 match(Set dst (CompressV src mask));
24476 match(Set dst (ExpandV src mask));
24477 format %{ "vector_compress_expand $dst, $src, $mask" %}
24478 ins_encode %{
24479 int opcode = this->ideal_Opcode();
24480 int vector_len = vector_length_encoding(this);
24481 BasicType bt = Matcher::vector_element_basic_type(this);
24482 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24483 %}
24484 ins_pipe( pipe_slow );
24485 %}
24486
24487 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24488 match(Set dst (CompressM mask));
24489 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24490 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24491 ins_encode %{
24492 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24493 int mask_len = Matcher::vector_length(this);
24494 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24495 %}
24496 ins_pipe( pipe_slow );
24497 %}
24498
24499 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24500
24501 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24502 predicate(!VM_Version::supports_gfni());
24503 match(Set dst (ReverseV src));
24504 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24505 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24506 ins_encode %{
24507 int vec_enc = vector_length_encoding(this);
24508 BasicType bt = Matcher::vector_element_basic_type(this);
24509 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24510 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24511 %}
24512 ins_pipe( pipe_slow );
24513 %}
24514
24515 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24516 predicate(VM_Version::supports_gfni());
24517 match(Set dst (ReverseV src));
24518 effect(TEMP dst, TEMP xtmp);
24519 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24520 ins_encode %{
24521 int vec_enc = vector_length_encoding(this);
24522 BasicType bt = Matcher::vector_element_basic_type(this);
24523 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24524 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24525 $xtmp$$XMMRegister);
24526 %}
24527 ins_pipe( pipe_slow );
24528 %}
24529
24530 instruct vreverse_byte_reg(vec dst, vec src) %{
24531 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24532 match(Set dst (ReverseBytesV src));
24533 effect(TEMP dst);
24534 format %{ "vector_reverse_byte $dst, $src" %}
24535 ins_encode %{
24536 int vec_enc = vector_length_encoding(this);
24537 BasicType bt = Matcher::vector_element_basic_type(this);
24538 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24539 %}
24540 ins_pipe( pipe_slow );
24541 %}
24542
24543 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24544 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24545 match(Set dst (ReverseBytesV src));
24546 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24547 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24548 ins_encode %{
24549 int vec_enc = vector_length_encoding(this);
24550 BasicType bt = Matcher::vector_element_basic_type(this);
24551 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24552 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24553 %}
24554 ins_pipe( pipe_slow );
24555 %}
24556
24557 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24558
24559 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24560 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24561 Matcher::vector_length_in_bytes(n->in(1))));
24562 match(Set dst (CountLeadingZerosV src));
24563 format %{ "vector_count_leading_zeros $dst, $src" %}
24564 ins_encode %{
24565 int vlen_enc = vector_length_encoding(this, $src);
24566 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24567 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24568 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24569 %}
24570 ins_pipe( pipe_slow );
24571 %}
24572
24573 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24574 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24575 Matcher::vector_length_in_bytes(n->in(1))));
24576 match(Set dst (CountLeadingZerosV src mask));
24577 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24578 ins_encode %{
24579 int vlen_enc = vector_length_encoding(this, $src);
24580 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24581 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24582 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24583 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24584 %}
24585 ins_pipe( pipe_slow );
24586 %}
24587
24588 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24589 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24590 VM_Version::supports_avx512cd() &&
24591 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24592 match(Set dst (CountLeadingZerosV src));
24593 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24594 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24595 ins_encode %{
24596 int vlen_enc = vector_length_encoding(this, $src);
24597 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24598 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24599 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24600 %}
24601 ins_pipe( pipe_slow );
24602 %}
24603
24604 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24605 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24606 match(Set dst (CountLeadingZerosV src));
24607 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24608 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24609 ins_encode %{
24610 int vlen_enc = vector_length_encoding(this, $src);
24611 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24612 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24613 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24614 $rtmp$$Register, true, vlen_enc);
24615 %}
24616 ins_pipe( pipe_slow );
24617 %}
24618
24619 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24620 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24621 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24622 match(Set dst (CountLeadingZerosV src));
24623 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24624 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24625 ins_encode %{
24626 int vlen_enc = vector_length_encoding(this, $src);
24627 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24628 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24629 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24630 %}
24631 ins_pipe( pipe_slow );
24632 %}
24633
24634 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24635 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24636 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24637 match(Set dst (CountLeadingZerosV src));
24638 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24639 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24640 ins_encode %{
24641 int vlen_enc = vector_length_encoding(this, $src);
24642 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24643 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24644 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24645 %}
24646 ins_pipe( pipe_slow );
24647 %}
24648
24649 // ---------------------------------- Vector Masked Operations ------------------------------------
24650
24651 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24652 match(Set dst (AddVB (Binary dst src2) mask));
24653 match(Set dst (AddVS (Binary dst src2) mask));
24654 match(Set dst (AddVI (Binary dst src2) mask));
24655 match(Set dst (AddVL (Binary dst src2) mask));
24656 match(Set dst (AddVF (Binary dst src2) mask));
24657 match(Set dst (AddVD (Binary dst src2) mask));
24658 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24659 ins_encode %{
24660 int vlen_enc = vector_length_encoding(this);
24661 BasicType bt = Matcher::vector_element_basic_type(this);
24662 int opc = this->ideal_Opcode();
24663 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24664 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24665 %}
24666 ins_pipe( pipe_slow );
24667 %}
24668
24669 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24670 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24671 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24672 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24673 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24674 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24675 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24676 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24677 ins_encode %{
24678 int vlen_enc = vector_length_encoding(this);
24679 BasicType bt = Matcher::vector_element_basic_type(this);
24680 int opc = this->ideal_Opcode();
24681 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24682 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24683 %}
24684 ins_pipe( pipe_slow );
24685 %}
24686
24687 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24688 match(Set dst (XorV (Binary dst src2) mask));
24689 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24690 ins_encode %{
24691 int vlen_enc = vector_length_encoding(this);
24692 BasicType bt = Matcher::vector_element_basic_type(this);
24693 int opc = this->ideal_Opcode();
24694 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24695 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24696 %}
24697 ins_pipe( pipe_slow );
24698 %}
24699
24700 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24701 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24702 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24703 ins_encode %{
24704 int vlen_enc = vector_length_encoding(this);
24705 BasicType bt = Matcher::vector_element_basic_type(this);
24706 int opc = this->ideal_Opcode();
24707 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24708 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24709 %}
24710 ins_pipe( pipe_slow );
24711 %}
24712
24713 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24714 match(Set dst (OrV (Binary dst src2) mask));
24715 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24716 ins_encode %{
24717 int vlen_enc = vector_length_encoding(this);
24718 BasicType bt = Matcher::vector_element_basic_type(this);
24719 int opc = this->ideal_Opcode();
24720 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24721 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24722 %}
24723 ins_pipe( pipe_slow );
24724 %}
24725
24726 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24727 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24728 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24729 ins_encode %{
24730 int vlen_enc = vector_length_encoding(this);
24731 BasicType bt = Matcher::vector_element_basic_type(this);
24732 int opc = this->ideal_Opcode();
24733 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24734 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24735 %}
24736 ins_pipe( pipe_slow );
24737 %}
24738
24739 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24740 match(Set dst (AndV (Binary dst src2) mask));
24741 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24742 ins_encode %{
24743 int vlen_enc = vector_length_encoding(this);
24744 BasicType bt = Matcher::vector_element_basic_type(this);
24745 int opc = this->ideal_Opcode();
24746 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24747 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24748 %}
24749 ins_pipe( pipe_slow );
24750 %}
24751
24752 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24753 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24754 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24755 ins_encode %{
24756 int vlen_enc = vector_length_encoding(this);
24757 BasicType bt = Matcher::vector_element_basic_type(this);
24758 int opc = this->ideal_Opcode();
24759 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24760 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24761 %}
24762 ins_pipe( pipe_slow );
24763 %}
24764
24765 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24766 match(Set dst (SubVB (Binary dst src2) mask));
24767 match(Set dst (SubVS (Binary dst src2) mask));
24768 match(Set dst (SubVI (Binary dst src2) mask));
24769 match(Set dst (SubVL (Binary dst src2) mask));
24770 match(Set dst (SubVF (Binary dst src2) mask));
24771 match(Set dst (SubVD (Binary dst src2) mask));
24772 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24773 ins_encode %{
24774 int vlen_enc = vector_length_encoding(this);
24775 BasicType bt = Matcher::vector_element_basic_type(this);
24776 int opc = this->ideal_Opcode();
24777 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24778 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24779 %}
24780 ins_pipe( pipe_slow );
24781 %}
24782
24783 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24784 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24785 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24786 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24787 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24788 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24789 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24790 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24791 ins_encode %{
24792 int vlen_enc = vector_length_encoding(this);
24793 BasicType bt = Matcher::vector_element_basic_type(this);
24794 int opc = this->ideal_Opcode();
24795 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24796 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24797 %}
24798 ins_pipe( pipe_slow );
24799 %}
24800
24801 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24802 match(Set dst (MulVS (Binary dst src2) mask));
24803 match(Set dst (MulVI (Binary dst src2) mask));
24804 match(Set dst (MulVL (Binary dst src2) mask));
24805 match(Set dst (MulVF (Binary dst src2) mask));
24806 match(Set dst (MulVD (Binary dst src2) mask));
24807 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24808 ins_encode %{
24809 int vlen_enc = vector_length_encoding(this);
24810 BasicType bt = Matcher::vector_element_basic_type(this);
24811 int opc = this->ideal_Opcode();
24812 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24813 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24814 %}
24815 ins_pipe( pipe_slow );
24816 %}
24817
24818 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24819 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24820 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24821 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24822 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24823 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24824 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24825 ins_encode %{
24826 int vlen_enc = vector_length_encoding(this);
24827 BasicType bt = Matcher::vector_element_basic_type(this);
24828 int opc = this->ideal_Opcode();
24829 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24830 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24831 %}
24832 ins_pipe( pipe_slow );
24833 %}
24834
24835 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24836 match(Set dst (SqrtVF dst mask));
24837 match(Set dst (SqrtVD dst mask));
24838 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24839 ins_encode %{
24840 int vlen_enc = vector_length_encoding(this);
24841 BasicType bt = Matcher::vector_element_basic_type(this);
24842 int opc = this->ideal_Opcode();
24843 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24844 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24845 %}
24846 ins_pipe( pipe_slow );
24847 %}
24848
24849 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24850 match(Set dst (DivVF (Binary dst src2) mask));
24851 match(Set dst (DivVD (Binary dst src2) mask));
24852 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24853 ins_encode %{
24854 int vlen_enc = vector_length_encoding(this);
24855 BasicType bt = Matcher::vector_element_basic_type(this);
24856 int opc = this->ideal_Opcode();
24857 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24858 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24859 %}
24860 ins_pipe( pipe_slow );
24861 %}
24862
24863 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24864 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24865 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24866 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24867 ins_encode %{
24868 int vlen_enc = vector_length_encoding(this);
24869 BasicType bt = Matcher::vector_element_basic_type(this);
24870 int opc = this->ideal_Opcode();
24871 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24872 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24873 %}
24874 ins_pipe( pipe_slow );
24875 %}
24876
24877
24878 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24879 match(Set dst (RotateLeftV (Binary dst shift) mask));
24880 match(Set dst (RotateRightV (Binary dst shift) mask));
24881 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24882 ins_encode %{
24883 int vlen_enc = vector_length_encoding(this);
24884 BasicType bt = Matcher::vector_element_basic_type(this);
24885 int opc = this->ideal_Opcode();
24886 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24887 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24888 %}
24889 ins_pipe( pipe_slow );
24890 %}
24891
24892 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24893 match(Set dst (RotateLeftV (Binary dst src2) mask));
24894 match(Set dst (RotateRightV (Binary dst src2) mask));
24895 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24896 ins_encode %{
24897 int vlen_enc = vector_length_encoding(this);
24898 BasicType bt = Matcher::vector_element_basic_type(this);
24899 int opc = this->ideal_Opcode();
24900 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24901 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24902 %}
24903 ins_pipe( pipe_slow );
24904 %}
24905
24906 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24907 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24908 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24909 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24910 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24911 ins_encode %{
24912 int vlen_enc = vector_length_encoding(this);
24913 BasicType bt = Matcher::vector_element_basic_type(this);
24914 int opc = this->ideal_Opcode();
24915 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24916 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24917 %}
24918 ins_pipe( pipe_slow );
24919 %}
24920
24921 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24922 predicate(!n->as_ShiftV()->is_var_shift());
24923 match(Set dst (LShiftVS (Binary dst src2) mask));
24924 match(Set dst (LShiftVI (Binary dst src2) mask));
24925 match(Set dst (LShiftVL (Binary dst src2) mask));
24926 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24927 ins_encode %{
24928 int vlen_enc = vector_length_encoding(this);
24929 BasicType bt = Matcher::vector_element_basic_type(this);
24930 int opc = this->ideal_Opcode();
24931 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24932 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24933 %}
24934 ins_pipe( pipe_slow );
24935 %}
24936
24937 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24938 predicate(n->as_ShiftV()->is_var_shift());
24939 match(Set dst (LShiftVS (Binary dst src2) mask));
24940 match(Set dst (LShiftVI (Binary dst src2) mask));
24941 match(Set dst (LShiftVL (Binary dst src2) mask));
24942 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24943 ins_encode %{
24944 int vlen_enc = vector_length_encoding(this);
24945 BasicType bt = Matcher::vector_element_basic_type(this);
24946 int opc = this->ideal_Opcode();
24947 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24948 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24949 %}
24950 ins_pipe( pipe_slow );
24951 %}
24952
24953 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24954 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24955 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24956 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24957 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24958 ins_encode %{
24959 int vlen_enc = vector_length_encoding(this);
24960 BasicType bt = Matcher::vector_element_basic_type(this);
24961 int opc = this->ideal_Opcode();
24962 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24963 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24964 %}
24965 ins_pipe( pipe_slow );
24966 %}
24967
24968 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24969 predicate(!n->as_ShiftV()->is_var_shift());
24970 match(Set dst (RShiftVS (Binary dst src2) mask));
24971 match(Set dst (RShiftVI (Binary dst src2) mask));
24972 match(Set dst (RShiftVL (Binary dst src2) mask));
24973 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24974 ins_encode %{
24975 int vlen_enc = vector_length_encoding(this);
24976 BasicType bt = Matcher::vector_element_basic_type(this);
24977 int opc = this->ideal_Opcode();
24978 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24979 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24980 %}
24981 ins_pipe( pipe_slow );
24982 %}
24983
24984 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24985 predicate(n->as_ShiftV()->is_var_shift());
24986 match(Set dst (RShiftVS (Binary dst src2) mask));
24987 match(Set dst (RShiftVI (Binary dst src2) mask));
24988 match(Set dst (RShiftVL (Binary dst src2) mask));
24989 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24990 ins_encode %{
24991 int vlen_enc = vector_length_encoding(this);
24992 BasicType bt = Matcher::vector_element_basic_type(this);
24993 int opc = this->ideal_Opcode();
24994 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24995 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24996 %}
24997 ins_pipe( pipe_slow );
24998 %}
24999
25000 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
25001 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
25002 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
25003 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
25004 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
25005 ins_encode %{
25006 int vlen_enc = vector_length_encoding(this);
25007 BasicType bt = Matcher::vector_element_basic_type(this);
25008 int opc = this->ideal_Opcode();
25009 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25010 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
25011 %}
25012 ins_pipe( pipe_slow );
25013 %}
25014
25015 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
25016 predicate(!n->as_ShiftV()->is_var_shift());
25017 match(Set dst (URShiftVS (Binary dst src2) mask));
25018 match(Set dst (URShiftVI (Binary dst src2) mask));
25019 match(Set dst (URShiftVL (Binary dst src2) mask));
25020 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25021 ins_encode %{
25022 int vlen_enc = vector_length_encoding(this);
25023 BasicType bt = Matcher::vector_element_basic_type(this);
25024 int opc = this->ideal_Opcode();
25025 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25026 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
25027 %}
25028 ins_pipe( pipe_slow );
25029 %}
25030
25031 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
25032 predicate(n->as_ShiftV()->is_var_shift());
25033 match(Set dst (URShiftVS (Binary dst src2) mask));
25034 match(Set dst (URShiftVI (Binary dst src2) mask));
25035 match(Set dst (URShiftVL (Binary dst src2) mask));
25036 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25037 ins_encode %{
25038 int vlen_enc = vector_length_encoding(this);
25039 BasicType bt = Matcher::vector_element_basic_type(this);
25040 int opc = this->ideal_Opcode();
25041 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25042 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25043 %}
25044 ins_pipe( pipe_slow );
25045 %}
25046
25047 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25048 match(Set dst (MaxV (Binary dst src2) mask));
25049 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25050 ins_encode %{
25051 int vlen_enc = vector_length_encoding(this);
25052 BasicType bt = Matcher::vector_element_basic_type(this);
25053 int opc = this->ideal_Opcode();
25054 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25055 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25056 %}
25057 ins_pipe( pipe_slow );
25058 %}
25059
25060 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25061 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25062 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25063 ins_encode %{
25064 int vlen_enc = vector_length_encoding(this);
25065 BasicType bt = Matcher::vector_element_basic_type(this);
25066 int opc = this->ideal_Opcode();
25067 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25068 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25069 %}
25070 ins_pipe( pipe_slow );
25071 %}
25072
25073 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25074 match(Set dst (MinV (Binary dst src2) mask));
25075 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25076 ins_encode %{
25077 int vlen_enc = vector_length_encoding(this);
25078 BasicType bt = Matcher::vector_element_basic_type(this);
25079 int opc = this->ideal_Opcode();
25080 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25081 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25082 %}
25083 ins_pipe( pipe_slow );
25084 %}
25085
25086 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25087 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25088 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25089 ins_encode %{
25090 int vlen_enc = vector_length_encoding(this);
25091 BasicType bt = Matcher::vector_element_basic_type(this);
25092 int opc = this->ideal_Opcode();
25093 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25094 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25095 %}
25096 ins_pipe( pipe_slow );
25097 %}
25098
25099 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25100 match(Set dst (VectorRearrange (Binary dst src2) mask));
25101 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25102 ins_encode %{
25103 int vlen_enc = vector_length_encoding(this);
25104 BasicType bt = Matcher::vector_element_basic_type(this);
25105 int opc = this->ideal_Opcode();
25106 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25107 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25108 %}
25109 ins_pipe( pipe_slow );
25110 %}
25111
25112 instruct vabs_masked(vec dst, kReg mask) %{
25113 match(Set dst (AbsVB dst mask));
25114 match(Set dst (AbsVS dst mask));
25115 match(Set dst (AbsVI dst mask));
25116 match(Set dst (AbsVL dst mask));
25117 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25118 ins_encode %{
25119 int vlen_enc = vector_length_encoding(this);
25120 BasicType bt = Matcher::vector_element_basic_type(this);
25121 int opc = this->ideal_Opcode();
25122 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25123 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25124 %}
25125 ins_pipe( pipe_slow );
25126 %}
25127
25128 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25129 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25130 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25131 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25132 ins_encode %{
25133 assert(UseFMA, "Needs FMA instructions support.");
25134 int vlen_enc = vector_length_encoding(this);
25135 BasicType bt = Matcher::vector_element_basic_type(this);
25136 int opc = this->ideal_Opcode();
25137 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25138 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25139 %}
25140 ins_pipe( pipe_slow );
25141 %}
25142
25143 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25144 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25145 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25146 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25147 ins_encode %{
25148 assert(UseFMA, "Needs FMA instructions support.");
25149 int vlen_enc = vector_length_encoding(this);
25150 BasicType bt = Matcher::vector_element_basic_type(this);
25151 int opc = this->ideal_Opcode();
25152 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25153 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25154 %}
25155 ins_pipe( pipe_slow );
25156 %}
25157
25158 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25159 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25160 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25161 ins_encode %{
25162 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25163 int vlen_enc = vector_length_encoding(this, $src1);
25164 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25165
25166 // Comparison i
25167 switch (src1_elem_bt) {
25168 case T_BYTE: {
25169 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25170 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25171 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25172 break;
25173 }
25174 case T_SHORT: {
25175 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25176 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25177 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25178 break;
25179 }
25180 case T_INT: {
25181 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25182 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25183 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25184 break;
25185 }
25186 case T_LONG: {
25187 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25188 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25189 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25190 break;
25191 }
25192 case T_FLOAT: {
25193 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25194 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25195 break;
25196 }
25197 case T_DOUBLE: {
25198 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25199 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25200 break;
25201 }
25202 default: assert(false, "%s", type2name(src1_elem_bt)); break;
25203 }
25204 %}
25205 ins_pipe( pipe_slow );
25206 %}
25207
25208 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25209 predicate(Matcher::vector_length(n) <= 32);
25210 match(Set dst (MaskAll src));
25211 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25212 ins_encode %{
25213 int mask_len = Matcher::vector_length(this);
25214 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25215 %}
25216 ins_pipe( pipe_slow );
25217 %}
25218
25219 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25220 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25221 match(Set dst (XorVMask src (MaskAll cnt)));
25222 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25223 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25224 ins_encode %{
25225 uint masklen = Matcher::vector_length(this);
25226 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25227 %}
25228 ins_pipe( pipe_slow );
25229 %}
25230
25231 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25232 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25233 (Matcher::vector_length(n) == 16) ||
25234 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25235 match(Set dst (XorVMask src (MaskAll cnt)));
25236 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25237 ins_encode %{
25238 uint masklen = Matcher::vector_length(this);
25239 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25240 %}
25241 ins_pipe( pipe_slow );
25242 %}
25243
25244 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
25245 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25246 match(Set dst (VectorLongToMask src));
25247 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
25248 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
25249 ins_encode %{
25250 int mask_len = Matcher::vector_length(this);
25251 int vec_enc = vector_length_encoding(mask_len);
25252 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25253 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25254 %}
25255 ins_pipe( pipe_slow );
25256 %}
25257
25258
25259 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25260 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25261 match(Set dst (VectorLongToMask src));
25262 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25263 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25264 ins_encode %{
25265 int mask_len = Matcher::vector_length(this);
25266 assert(mask_len <= 32, "invalid mask length");
25267 int vec_enc = vector_length_encoding(mask_len);
25268 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25269 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25270 %}
25271 ins_pipe( pipe_slow );
25272 %}
25273
25274 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25275 predicate(n->bottom_type()->isa_vectmask());
25276 match(Set dst (VectorLongToMask src));
25277 format %{ "long_to_mask_evex $dst, $src\t!" %}
25278 ins_encode %{
25279 __ kmov($dst$$KRegister, $src$$Register);
25280 %}
25281 ins_pipe( pipe_slow );
25282 %}
25283
25284 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25285 match(Set dst (AndVMask src1 src2));
25286 match(Set dst (OrVMask src1 src2));
25287 match(Set dst (XorVMask src1 src2));
25288 effect(TEMP kscratch);
25289 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25290 ins_encode %{
25291 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25292 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25293 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25294 uint masklen = Matcher::vector_length(this);
25295 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25296 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25297 %}
25298 ins_pipe( pipe_slow );
25299 %}
25300
25301 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25302 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25303 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25304 ins_encode %{
25305 int vlen_enc = vector_length_encoding(this);
25306 BasicType bt = Matcher::vector_element_basic_type(this);
25307 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25308 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25309 %}
25310 ins_pipe( pipe_slow );
25311 %}
25312
25313 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25314 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25315 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25316 ins_encode %{
25317 int vlen_enc = vector_length_encoding(this);
25318 BasicType bt = Matcher::vector_element_basic_type(this);
25319 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25320 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25321 %}
25322 ins_pipe( pipe_slow );
25323 %}
25324
25325 instruct castMM(kReg dst)
25326 %{
25327 match(Set dst (CastVV dst));
25328
25329 size(0);
25330 format %{ "# castVV of $dst" %}
25331 ins_encode(/* empty encoding */);
25332 ins_cost(0);
25333 ins_pipe(empty);
25334 %}
25335
25336 instruct castVV(vec dst)
25337 %{
25338 match(Set dst (CastVV dst));
25339
25340 size(0);
25341 format %{ "# castVV of $dst" %}
25342 ins_encode(/* empty encoding */);
25343 ins_cost(0);
25344 ins_pipe(empty);
25345 %}
25346
25347 instruct castVVLeg(legVec dst)
25348 %{
25349 match(Set dst (CastVV dst));
25350
25351 size(0);
25352 format %{ "# castVV of $dst" %}
25353 ins_encode(/* empty encoding */);
25354 ins_cost(0);
25355 ins_pipe(empty);
25356 %}
25357
25358 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25359 %{
25360 match(Set dst (IsInfiniteF src));
25361 effect(TEMP ktmp, KILL cr);
25362 format %{ "float_class_check $dst, $src" %}
25363 ins_encode %{
25364 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25365 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25366 %}
25367 ins_pipe(pipe_slow);
25368 %}
25369
25370 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25371 %{
25372 match(Set dst (IsInfiniteD src));
25373 effect(TEMP ktmp, KILL cr);
25374 format %{ "double_class_check $dst, $src" %}
25375 ins_encode %{
25376 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25377 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25378 %}
25379 ins_pipe(pipe_slow);
25380 %}
25381
25382 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25383 %{
25384 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25385 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25386 match(Set dst (SaturatingAddV src1 src2));
25387 match(Set dst (SaturatingSubV src1 src2));
25388 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25389 ins_encode %{
25390 int vlen_enc = vector_length_encoding(this);
25391 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25392 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25393 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25394 %}
25395 ins_pipe(pipe_slow);
25396 %}
25397
25398 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25399 %{
25400 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25401 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25402 match(Set dst (SaturatingAddV src1 src2));
25403 match(Set dst (SaturatingSubV src1 src2));
25404 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25405 ins_encode %{
25406 int vlen_enc = vector_length_encoding(this);
25407 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25408 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25409 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25410 %}
25411 ins_pipe(pipe_slow);
25412 %}
25413
25414 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25415 %{
25416 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25417 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25418 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25419 match(Set dst (SaturatingAddV src1 src2));
25420 match(Set dst (SaturatingSubV src1 src2));
25421 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25422 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25423 ins_encode %{
25424 int vlen_enc = vector_length_encoding(this);
25425 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25426 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25427 $src1$$XMMRegister, $src2$$XMMRegister,
25428 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25429 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25430 %}
25431 ins_pipe(pipe_slow);
25432 %}
25433
25434 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25435 %{
25436 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25437 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25438 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25439 match(Set dst (SaturatingAddV src1 src2));
25440 match(Set dst (SaturatingSubV src1 src2));
25441 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25442 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25443 ins_encode %{
25444 int vlen_enc = vector_length_encoding(this);
25445 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25446 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25447 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25448 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25449 %}
25450 ins_pipe(pipe_slow);
25451 %}
25452
25453 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25454 %{
25455 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25456 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25457 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25458 match(Set dst (SaturatingAddV src1 src2));
25459 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25460 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25461 ins_encode %{
25462 int vlen_enc = vector_length_encoding(this);
25463 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25464 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25465 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25466 %}
25467 ins_pipe(pipe_slow);
25468 %}
25469
25470 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25471 %{
25472 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25473 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25474 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25475 match(Set dst (SaturatingAddV src1 src2));
25476 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25477 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25478 ins_encode %{
25479 int vlen_enc = vector_length_encoding(this);
25480 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25481 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25482 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25483 %}
25484 ins_pipe(pipe_slow);
25485 %}
25486
25487 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25488 %{
25489 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25490 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25491 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25492 match(Set dst (SaturatingSubV src1 src2));
25493 effect(TEMP ktmp);
25494 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25495 ins_encode %{
25496 int vlen_enc = vector_length_encoding(this);
25497 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25498 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25499 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25500 %}
25501 ins_pipe(pipe_slow);
25502 %}
25503
25504 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25505 %{
25506 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25507 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25508 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25509 match(Set dst (SaturatingSubV src1 src2));
25510 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25511 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25512 ins_encode %{
25513 int vlen_enc = vector_length_encoding(this);
25514 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25515 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25516 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25517 %}
25518 ins_pipe(pipe_slow);
25519 %}
25520
25521 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25522 %{
25523 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25524 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25525 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25526 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25527 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25528 ins_encode %{
25529 int vlen_enc = vector_length_encoding(this);
25530 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25531 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25532 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25533 %}
25534 ins_pipe(pipe_slow);
25535 %}
25536
25537 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25538 %{
25539 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25540 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25541 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25542 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25543 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25544 ins_encode %{
25545 int vlen_enc = vector_length_encoding(this);
25546 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25547 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25548 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25549 %}
25550 ins_pipe(pipe_slow);
25551 %}
25552
25553 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25554 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25555 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25556 match(Set dst (SaturatingAddV (Binary dst src) mask));
25557 match(Set dst (SaturatingSubV (Binary dst src) mask));
25558 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25559 ins_encode %{
25560 int vlen_enc = vector_length_encoding(this);
25561 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25562 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25563 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25564 %}
25565 ins_pipe( pipe_slow );
25566 %}
25567
25568 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25569 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25570 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25571 match(Set dst (SaturatingAddV (Binary dst src) mask));
25572 match(Set dst (SaturatingSubV (Binary dst src) mask));
25573 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25574 ins_encode %{
25575 int vlen_enc = vector_length_encoding(this);
25576 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25577 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25578 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25579 %}
25580 ins_pipe( pipe_slow );
25581 %}
25582
25583 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25584 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25585 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25586 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25587 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25588 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25589 ins_encode %{
25590 int vlen_enc = vector_length_encoding(this);
25591 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25592 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25593 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25594 %}
25595 ins_pipe( pipe_slow );
25596 %}
25597
25598 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25599 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25600 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25601 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25602 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25603 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25604 ins_encode %{
25605 int vlen_enc = vector_length_encoding(this);
25606 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25607 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25608 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25609 %}
25610 ins_pipe( pipe_slow );
25611 %}
25612
25613 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25614 %{
25615 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25616 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25617 ins_encode %{
25618 int vlen_enc = vector_length_encoding(this);
25619 BasicType bt = Matcher::vector_element_basic_type(this);
25620 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25621 %}
25622 ins_pipe(pipe_slow);
25623 %}
25624
25625 instruct reinterpretS2HF(regF dst, rRegI src)
25626 %{
25627 match(Set dst (ReinterpretS2HF src));
25628 format %{ "evmovw $dst, $src" %}
25629 ins_encode %{
25630 __ evmovw($dst$$XMMRegister, $src$$Register);
25631 %}
25632 ins_pipe(pipe_slow);
25633 %}
25634
25635 instruct reinterpretHF2S(rRegI dst, regF src)
25636 %{
25637 match(Set dst (ReinterpretHF2S src));
25638 format %{ "evmovw $dst, $src" %}
25639 ins_encode %{
25640 __ evmovw($dst$$Register, $src$$XMMRegister);
25641 __ narrow_subword_type($dst$$Register, T_SHORT);
25642 %}
25643 ins_pipe(pipe_slow);
25644 %}
25645
25646 instruct convF2HFAndS2HF(regF dst, regF src)
25647 %{
25648 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25649 format %{ "convF2HFAndS2HF $dst, $src" %}
25650 ins_encode %{
25651 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25652 %}
25653 ins_pipe(pipe_slow);
25654 %}
25655
25656 instruct convHF2SAndHF2F(regF dst, regF src)
25657 %{
25658 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25659 format %{ "convHF2SAndHF2F $dst, $src" %}
25660 ins_encode %{
25661 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25662 %}
25663 ins_pipe(pipe_slow);
25664 %}
25665
25666 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25667 %{
25668 match(Set dst (SqrtHF src));
25669 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25670 ins_encode %{
25671 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25672 %}
25673 ins_pipe(pipe_slow);
25674 %}
25675
25676 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25677 %{
25678 match(Set dst (AddHF src1 src2));
25679 match(Set dst (DivHF src1 src2));
25680 match(Set dst (MulHF src1 src2));
25681 match(Set dst (SubHF src1 src2));
25682 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25683 ins_encode %{
25684 int opcode = this->ideal_Opcode();
25685 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25686 %}
25687 ins_pipe(pipe_slow);
25688 %}
25689
25690 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25691 %{
25692 predicate(VM_Version::supports_avx10_2());
25693 match(Set dst (MaxHF src1 src2));
25694 match(Set dst (MinHF src1 src2));
25695
25696 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25697 ins_encode %{
25698 int opcode = this->ideal_Opcode();
25699 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25700 %}
25701 ins_pipe( pipe_slow );
25702 %}
25703
25704 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25705 %{
25706 predicate(!VM_Version::supports_avx10_2());
25707 match(Set dst (MaxHF src1 src2));
25708 match(Set dst (MinHF src1 src2));
25709 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25710
25711 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25712 ins_encode %{
25713 int opcode = this->ideal_Opcode();
25714 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25715 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25716 %}
25717 ins_pipe( pipe_slow );
25718 %}
25719
25720 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25721 %{
25722 match(Set dst (FmaHF src2 (Binary dst src1)));
25723 effect(DEF dst);
25724 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25725 ins_encode %{
25726 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25727 %}
25728 ins_pipe( pipe_slow );
25729 %}
25730
25731
25732 instruct vector_sqrt_HF_reg(vec dst, vec src)
25733 %{
25734 match(Set dst (SqrtVHF src));
25735 format %{ "vector_sqrt_fp16 $dst, $src" %}
25736 ins_encode %{
25737 int vlen_enc = vector_length_encoding(this);
25738 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25739 %}
25740 ins_pipe(pipe_slow);
25741 %}
25742
25743 instruct vector_sqrt_HF_mem(vec dst, memory src)
25744 %{
25745 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25746 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25747 ins_encode %{
25748 int vlen_enc = vector_length_encoding(this);
25749 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25750 %}
25751 ins_pipe(pipe_slow);
25752 %}
25753
25754 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25755 %{
25756 match(Set dst (AddVHF src1 src2));
25757 match(Set dst (DivVHF src1 src2));
25758 match(Set dst (MulVHF src1 src2));
25759 match(Set dst (SubVHF src1 src2));
25760 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25761 ins_encode %{
25762 int vlen_enc = vector_length_encoding(this);
25763 int opcode = this->ideal_Opcode();
25764 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25765 %}
25766 ins_pipe(pipe_slow);
25767 %}
25768
25769
25770 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25771 %{
25772 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25773 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25774 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25775 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25776 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25777 ins_encode %{
25778 int vlen_enc = vector_length_encoding(this);
25779 int opcode = this->ideal_Opcode();
25780 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25781 %}
25782 ins_pipe(pipe_slow);
25783 %}
25784
25785 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25786 %{
25787 match(Set dst (FmaVHF src2 (Binary dst src1)));
25788 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25789 ins_encode %{
25790 int vlen_enc = vector_length_encoding(this);
25791 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25792 %}
25793 ins_pipe( pipe_slow );
25794 %}
25795
25796 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25797 %{
25798 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25799 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25800 ins_encode %{
25801 int vlen_enc = vector_length_encoding(this);
25802 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25803 %}
25804 ins_pipe( pipe_slow );
25805 %}
25806
25807 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25808 %{
25809 predicate(VM_Version::supports_avx10_2());
25810 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25811 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25812 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25813 ins_encode %{
25814 int vlen_enc = vector_length_encoding(this);
25815 int opcode = this->ideal_Opcode();
25816 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25817 k0, vlen_enc);
25818 %}
25819 ins_pipe( pipe_slow );
25820 %}
25821
25822 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25823 %{
25824 predicate(VM_Version::supports_avx10_2());
25825 match(Set dst (MinVHF src1 src2));
25826 match(Set dst (MaxVHF src1 src2));
25827 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25828 ins_encode %{
25829 int vlen_enc = vector_length_encoding(this);
25830 int opcode = this->ideal_Opcode();
25831 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25832 k0, vlen_enc);
25833 %}
25834 ins_pipe( pipe_slow );
25835 %}
25836
25837 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25838 %{
25839 predicate(!VM_Version::supports_avx10_2());
25840 match(Set dst (MinVHF src1 src2));
25841 match(Set dst (MaxVHF src1 src2));
25842 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25843 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25844 ins_encode %{
25845 int vlen_enc = vector_length_encoding(this);
25846 int opcode = this->ideal_Opcode();
25847 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25848 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25849 %}
25850 ins_pipe( pipe_slow );
25851 %}
25852
25853 //----------PEEPHOLE RULES-----------------------------------------------------
25854 // These must follow all instruction definitions as they use the names
25855 // defined in the instructions definitions.
25856 //
25857 // peeppredicate ( rule_predicate );
25858 // // the predicate unless which the peephole rule will be ignored
25859 //
25860 // peepmatch ( root_instr_name [preceding_instruction]* );
25861 //
25862 // peepprocedure ( procedure_name );
25863 // // provide a procedure name to perform the optimization, the procedure should
25864 // // reside in the architecture dependent peephole file, the method has the
25865 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25866 // // with the arguments being the basic block, the current node index inside the
25867 // // block, the register allocator, the functions upon invoked return a new node
25868 // // defined in peepreplace, and the rules of the nodes appearing in the
25869 // // corresponding peepmatch, the function return true if successful, else
25870 // // return false
25871 //
25872 // peepconstraint %{
25873 // (instruction_number.operand_name relational_op instruction_number.operand_name
25874 // [, ...] );
25875 // // instruction numbers are zero-based using left to right order in peepmatch
25876 //
25877 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25878 // // provide an instruction_number.operand_name for each operand that appears
25879 // // in the replacement instruction's match rule
25880 //
25881 // ---------VM FLAGS---------------------------------------------------------
25882 //
25883 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25884 //
25885 // Each peephole rule is given an identifying number starting with zero and
25886 // increasing by one in the order seen by the parser. An individual peephole
25887 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25888 // on the command-line.
25889 //
25890 // ---------CURRENT LIMITATIONS----------------------------------------------
25891 //
25892 // Only transformations inside a basic block (do we need more for peephole)
25893 //
25894 // ---------EXAMPLE----------------------------------------------------------
25895 //
25896 // // pertinent parts of existing instructions in architecture description
25897 // instruct movI(rRegI dst, rRegI src)
25898 // %{
25899 // match(Set dst (CopyI src));
25900 // %}
25901 //
25902 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25903 // %{
25904 // match(Set dst (AddI dst src));
25905 // effect(KILL cr);
25906 // %}
25907 //
25908 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25909 // %{
25910 // match(Set dst (AddI dst src));
25911 // %}
25912 //
25913 // 1. Simple replacement
25914 // - Only match adjacent instructions in same basic block
25915 // - Only equality constraints
25916 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25917 // - Only one replacement instruction
25918 //
25919 // // Change (inc mov) to lea
25920 // peephole %{
25921 // // lea should only be emitted when beneficial
25922 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25923 // // increment preceded by register-register move
25924 // peepmatch ( incI_rReg movI );
25925 // // require that the destination register of the increment
25926 // // match the destination register of the move
25927 // peepconstraint ( 0.dst == 1.dst );
25928 // // construct a replacement instruction that sets
25929 // // the destination to ( move's source register + one )
25930 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25931 // %}
25932 //
25933 // 2. Procedural replacement
25934 // - More flexible finding relevent nodes
25935 // - More flexible constraints
25936 // - More flexible transformations
25937 // - May utilise architecture-dependent API more effectively
25938 // - Currently only one replacement instruction due to adlc parsing capabilities
25939 //
25940 // // Change (inc mov) to lea
25941 // peephole %{
25942 // // lea should only be emitted when beneficial
25943 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25944 // // the rule numbers of these nodes inside are passed into the function below
25945 // peepmatch ( incI_rReg movI );
25946 // // the method that takes the responsibility of transformation
25947 // peepprocedure ( inc_mov_to_lea );
25948 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25949 // // node is passed into the function above
25950 // peepreplace ( leaI_rReg_immI() );
25951 // %}
25952
25953 // These instructions is not matched by the matcher but used by the peephole
25954 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25955 %{
25956 predicate(false);
25957 match(Set dst (AddI src1 src2));
25958 format %{ "leal $dst, [$src1 + $src2]" %}
25959 ins_encode %{
25960 Register dst = $dst$$Register;
25961 Register src1 = $src1$$Register;
25962 Register src2 = $src2$$Register;
25963 if (src1 != rbp && src1 != r13) {
25964 __ leal(dst, Address(src1, src2, Address::times_1));
25965 } else {
25966 assert(src2 != rbp && src2 != r13, "");
25967 __ leal(dst, Address(src2, src1, Address::times_1));
25968 }
25969 %}
25970 ins_pipe(ialu_reg_reg);
25971 %}
25972
25973 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25974 %{
25975 predicate(false);
25976 match(Set dst (AddI src1 src2));
25977 format %{ "leal $dst, [$src1 + $src2]" %}
25978 ins_encode %{
25979 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25980 %}
25981 ins_pipe(ialu_reg_reg);
25982 %}
25983
25984 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25985 %{
25986 predicate(false);
25987 match(Set dst (LShiftI src shift));
25988 format %{ "leal $dst, [$src << $shift]" %}
25989 ins_encode %{
25990 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25991 Register src = $src$$Register;
25992 if (scale == Address::times_2 && src != rbp && src != r13) {
25993 __ leal($dst$$Register, Address(src, src, Address::times_1));
25994 } else {
25995 __ leal($dst$$Register, Address(noreg, src, scale));
25996 }
25997 %}
25998 ins_pipe(ialu_reg_reg);
25999 %}
26000
26001 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
26002 %{
26003 predicate(false);
26004 match(Set dst (AddL src1 src2));
26005 format %{ "leaq $dst, [$src1 + $src2]" %}
26006 ins_encode %{
26007 Register dst = $dst$$Register;
26008 Register src1 = $src1$$Register;
26009 Register src2 = $src2$$Register;
26010 if (src1 != rbp && src1 != r13) {
26011 __ leaq(dst, Address(src1, src2, Address::times_1));
26012 } else {
26013 assert(src2 != rbp && src2 != r13, "");
26014 __ leaq(dst, Address(src2, src1, Address::times_1));
26015 }
26016 %}
26017 ins_pipe(ialu_reg_reg);
26018 %}
26019
26020 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
26021 %{
26022 predicate(false);
26023 match(Set dst (AddL src1 src2));
26024 format %{ "leaq $dst, [$src1 + $src2]" %}
26025 ins_encode %{
26026 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
26027 %}
26028 ins_pipe(ialu_reg_reg);
26029 %}
26030
26031 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
26032 %{
26033 predicate(false);
26034 match(Set dst (LShiftL src shift));
26035 format %{ "leaq $dst, [$src << $shift]" %}
26036 ins_encode %{
26037 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
26038 Register src = $src$$Register;
26039 if (scale == Address::times_2 && src != rbp && src != r13) {
26040 __ leaq($dst$$Register, Address(src, src, Address::times_1));
26041 } else {
26042 __ leaq($dst$$Register, Address(noreg, src, scale));
26043 }
26044 %}
26045 ins_pipe(ialu_reg_reg);
26046 %}
26047
26048 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26049 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26050 // processors with at least partial ALU support for lea
26051 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26052 // beneficial for processors with full ALU support
26053 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26054
26055 peephole
26056 %{
26057 peeppredicate(VM_Version::supports_fast_2op_lea());
26058 peepmatch (addI_rReg);
26059 peepprocedure (lea_coalesce_reg);
26060 peepreplace (leaI_rReg_rReg_peep());
26061 %}
26062
26063 peephole
26064 %{
26065 peeppredicate(VM_Version::supports_fast_2op_lea());
26066 peepmatch (addI_rReg_imm);
26067 peepprocedure (lea_coalesce_imm);
26068 peepreplace (leaI_rReg_immI_peep());
26069 %}
26070
26071 peephole
26072 %{
26073 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26074 VM_Version::is_intel_cascade_lake());
26075 peepmatch (incI_rReg);
26076 peepprocedure (lea_coalesce_imm);
26077 peepreplace (leaI_rReg_immI_peep());
26078 %}
26079
26080 peephole
26081 %{
26082 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26083 VM_Version::is_intel_cascade_lake());
26084 peepmatch (decI_rReg);
26085 peepprocedure (lea_coalesce_imm);
26086 peepreplace (leaI_rReg_immI_peep());
26087 %}
26088
26089 peephole
26090 %{
26091 peeppredicate(VM_Version::supports_fast_2op_lea());
26092 peepmatch (salI_rReg_immI2);
26093 peepprocedure (lea_coalesce_imm);
26094 peepreplace (leaI_rReg_immI2_peep());
26095 %}
26096
26097 peephole
26098 %{
26099 peeppredicate(VM_Version::supports_fast_2op_lea());
26100 peepmatch (addL_rReg);
26101 peepprocedure (lea_coalesce_reg);
26102 peepreplace (leaL_rReg_rReg_peep());
26103 %}
26104
26105 peephole
26106 %{
26107 peeppredicate(VM_Version::supports_fast_2op_lea());
26108 peepmatch (addL_rReg_imm);
26109 peepprocedure (lea_coalesce_imm);
26110 peepreplace (leaL_rReg_immL32_peep());
26111 %}
26112
26113 peephole
26114 %{
26115 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26116 VM_Version::is_intel_cascade_lake());
26117 peepmatch (incL_rReg);
26118 peepprocedure (lea_coalesce_imm);
26119 peepreplace (leaL_rReg_immL32_peep());
26120 %}
26121
26122 peephole
26123 %{
26124 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26125 VM_Version::is_intel_cascade_lake());
26126 peepmatch (decL_rReg);
26127 peepprocedure (lea_coalesce_imm);
26128 peepreplace (leaL_rReg_immL32_peep());
26129 %}
26130
26131 peephole
26132 %{
26133 peeppredicate(VM_Version::supports_fast_2op_lea());
26134 peepmatch (salL_rReg_immI2);
26135 peepprocedure (lea_coalesce_imm);
26136 peepreplace (leaL_rReg_immI2_peep());
26137 %}
26138
26139 peephole
26140 %{
26141 peepmatch (leaPCompressedOopOffset);
26142 peepprocedure (lea_remove_redundant);
26143 %}
26144
26145 peephole
26146 %{
26147 peepmatch (leaP8Narrow);
26148 peepprocedure (lea_remove_redundant);
26149 %}
26150
26151 peephole
26152 %{
26153 peepmatch (leaP32Narrow);
26154 peepprocedure (lea_remove_redundant);
26155 %}
26156
26157 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26158 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26159
26160 //int variant
26161 peephole
26162 %{
26163 peepmatch (testI_reg);
26164 peepprocedure (test_may_remove);
26165 %}
26166
26167 //long variant
26168 peephole
26169 %{
26170 peepmatch (testL_reg);
26171 peepprocedure (test_may_remove);
26172 %}
26173
26174
26175 //----------SMARTSPILL RULES---------------------------------------------------
26176 // These must follow all instruction definitions as they use the names
26177 // defined in the instructions definitions.