1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662 //
1663 // Compute padding required for nodes which need alignment
1664 //
1665
1666 // The address of the call instruction needs to be 4-byte aligned to
1667 // ensure that it does not span a cache line so that it can be patched.
1668 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1669 {
1670 current_offset += clear_avx_size(); // skip vzeroupper
1671 current_offset += 1; // skip call opcode byte
1672 return align_up(current_offset, alignment_required()) - current_offset;
1673 }
1674
1675 // The address of the call instruction needs to be 4-byte aligned to
1676 // ensure that it does not span a cache line so that it can be patched.
1677 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1678 {
1679 current_offset += clear_avx_size(); // skip vzeroupper
1680 current_offset += 11; // skip movq instruction + call opcode byte
1681 return align_up(current_offset, alignment_required()) - current_offset;
1682 }
1683
1684 // This could be in MacroAssembler but it's fairly C2 specific
1685 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1686 Label exit;
1687 __ jccb(Assembler::noParity, exit);
1688 __ pushf();
1689 //
1690 // comiss/ucomiss instructions set ZF,PF,CF flags and
1691 // zero OF,AF,SF for NaN values.
1692 // Fixup flags by zeroing ZF,PF so that compare of NaN
1693 // values returns 'less than' result (CF is set).
1694 // Leave the rest of flags unchanged.
1695 //
1696 // 7 6 5 4 3 2 1 0
1697 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1698 // 0 0 1 0 1 0 1 1 (0x2B)
1699 //
1700 __ andq(Address(rsp, 0), 0xffffff2b);
1701 __ popf();
1702 __ bind(exit);
1703 }
1704
1705 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1706 // If any floating point comparison instruction is used, unordered case always triggers jump
1707 // for below condition, CF=1 is true when at least one input is NaN
1708 Label done;
1709 __ movl(dst, -1);
1710 __ jcc(Assembler::below, done);
1711 __ setcc(Assembler::notEqual, dst);
1712 __ bind(done);
1713 }
1714
1715 enum FP_PREC {
1716 fp_prec_hlf,
1717 fp_prec_flt,
1718 fp_prec_dbl
1719 };
1720
1721 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1722 XMMRegister p, XMMRegister q) {
1723 if (pt == fp_prec_hlf) {
1724 __ evucomish(p, q);
1725 } else if (pt == fp_prec_flt) {
1726 __ ucomiss(p, q);
1727 } else {
1728 __ ucomisd(p, q);
1729 }
1730 }
1731
1732 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1733 XMMRegister dst, XMMRegister src, Register scratch) {
1734 if (pt == fp_prec_hlf) {
1735 __ movhlf(dst, src, scratch);
1736 } else if (pt == fp_prec_flt) {
1737 __ movflt(dst, src);
1738 } else {
1739 __ movdbl(dst, src);
1740 }
1741 }
1742
1743 // Math.min() # Math.max()
1744 // -----------------------------
1745 // (v)ucomis[h/s/d] #
1746 // ja -> b # a
1747 // jp -> NaN # NaN
1748 // jb -> a # b
1749 // je -> a | b # a & b
1750 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1751 XMMRegister a, XMMRegister b, Register rt,
1752 bool min, enum FP_PREC pt) {
1753 Label nan, zero, below, above, done;
1754
1755 emit_fp_ucom(masm, pt, a, b);
1756
1757 if (dst->encoding() != (min ? b : a)->encoding()) {
1758 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1759 } else {
1760 __ jccb(Assembler::above, done);
1761 }
1762 __ jccb(Assembler::parity, nan); // PF=1
1763 __ jccb(Assembler::below, below); // CF=1
1764
1765 // equal
1766 // Using bitwise operations is a low cost way to compute the correct result
1767 // for zero and non-zero inputs in this scenario except for NaN, which is
1768 // handled separately. The mantissa and exponent are valid with either
1769 // bitwise operation. For zero inputs, the sign bit is chosen according to
1770 // whether a minimum or maximum value is required.
1771 if (min) {
1772 // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
1773 __ vpor(dst, a, b, Assembler::AVX_128bit);
1774 } else {
1775 // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
1776 __ vpand(dst, a, b, Assembler::AVX_128bit);
1777 }
1778 __ jmp(done);
1779
1780 __ bind(above);
1781 movfp(masm, pt, dst, min ? b : a, rt);
1782 __ jmp(done);
1783
1784 __ bind(nan);
1785 if (pt == fp_prec_hlf) {
1786 __ movl(rt, 0x00007e00); // Float16.NaN
1787 __ evmovw(dst, rt);
1788 } else if (pt == fp_prec_flt) {
1789 __ movl(rt, 0x7fc00000); // Float.NaN
1790 __ movdl(dst, rt);
1791 } else {
1792 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1793 __ movdq(dst, rt);
1794 }
1795 __ jmp(done);
1796
1797 __ bind(below);
1798 movfp(masm, pt, dst, min ? a : b, rt);
1799
1800 __ bind(done);
1801 }
1802
1803 //=============================================================================
1804 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1805
1806 int ConstantTable::calculate_table_base_offset() const {
1807 return 0; // absolute addressing, no offset
1808 }
1809
1810 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1811 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1812 ShouldNotReachHere();
1813 }
1814
1815 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1816 // Empty encoding
1817 }
1818
1819 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1820 return 0;
1821 }
1822
1823 #ifndef PRODUCT
1824 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1825 st->print("# MachConstantBaseNode (empty encoding)");
1826 }
1827 #endif
1828
1829
1830 //=============================================================================
1831 #ifndef PRODUCT
1832 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1833 Compile* C = ra_->C;
1834
1835 int framesize = C->output()->frame_size_in_bytes();
1836 int bangsize = C->output()->bang_size_in_bytes();
1837 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1838 // Remove wordSize for return addr which is already pushed.
1839 framesize -= wordSize;
1840
1841 if (C->output()->need_stack_bang(bangsize)) {
1842 framesize -= wordSize;
1843 st->print("# stack bang (%d bytes)", bangsize);
1844 st->print("\n\t");
1845 st->print("pushq rbp\t# Save rbp");
1846 if (PreserveFramePointer) {
1847 st->print("\n\t");
1848 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1849 }
1850 if (framesize) {
1851 st->print("\n\t");
1852 st->print("subq rsp, #%d\t# Create frame",framesize);
1853 }
1854 } else {
1855 st->print("subq rsp, #%d\t# Create frame",framesize);
1856 st->print("\n\t");
1857 framesize -= wordSize;
1858 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1859 if (PreserveFramePointer) {
1860 st->print("\n\t");
1861 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1862 if (framesize > 0) {
1863 st->print("\n\t");
1864 st->print("addq rbp, #%d", framesize);
1865 }
1866 }
1867 }
1868
1869 if (VerifyStackAtCalls) {
1870 st->print("\n\t");
1871 framesize -= wordSize;
1872 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1873 #ifdef ASSERT
1874 st->print("\n\t");
1875 st->print("# stack alignment check");
1876 #endif
1877 }
1878 if (C->stub_function() != nullptr) {
1879 st->print("\n\t");
1880 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1881 st->print("\n\t");
1882 st->print("je fast_entry\t");
1883 st->print("\n\t");
1884 st->print("call #nmethod_entry_barrier_stub\t");
1885 st->print("\n\tfast_entry:");
1886 }
1887 st->cr();
1888 }
1889 #endif
1890
1891 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1892 Compile* C = ra_->C;
1893
1894 __ verified_entry(C);
1895
1896 if (ra_->C->stub_function() == nullptr) {
1897 __ entry_barrier();
1898 }
1899
1900 if (!Compile::current()->output()->in_scratch_emit_size()) {
1901 __ bind(*_verified_entry);
1902 }
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914
1915 int MachPrologNode::reloc() const
1916 {
1917 return 0; // a large enough number
1918 }
1919
1920 //=============================================================================
1921 #ifndef PRODUCT
1922 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1923 {
1924 Compile* C = ra_->C;
1925 if (generate_vzeroupper(C)) {
1926 st->print("vzeroupper");
1927 st->cr(); st->print("\t");
1928 }
1929
1930 int framesize = C->output()->frame_size_in_bytes();
1931 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1932 // Remove word for return adr already pushed
1933 // and RBP
1934 framesize -= 2*wordSize;
1935
1936 if (framesize) {
1937 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1938 st->print("\t");
1939 }
1940
1941 st->print_cr("popq rbp");
1942 if (do_polling() && C->is_method_compilation()) {
1943 st->print("\t");
1944 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1945 "ja #safepoint_stub\t"
1946 "# Safepoint: poll for GC");
1947 }
1948 }
1949 #endif
1950
1951 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1952 {
1953 Compile* C = ra_->C;
1954
1955 if (generate_vzeroupper(C)) {
1956 // Clear upper bits of YMM registers when current compiled code uses
1957 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1958 __ vzeroupper();
1959 }
1960
1961 // Subtract two words to account for return address and rbp
1962 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1963 __ remove_frame(initial_framesize, C->needs_stack_repair());
1964
1965 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1966 __ reserved_stack_check();
1967 }
1968
1969 if (do_polling() && C->is_method_compilation()) {
1970 Label dummy_label;
1971 Label* code_stub = &dummy_label;
1972 if (!C->output()->in_scratch_emit_size()) {
1973 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1974 C->output()->add_stub(stub);
1975 code_stub = &stub->entry();
1976 }
1977 __ relocate(relocInfo::poll_return_type);
1978 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1979 }
1980 }
1981
1982 int MachEpilogNode::reloc() const
1983 {
1984 return 2; // a large enough number
1985 }
1986
1987 const Pipeline* MachEpilogNode::pipeline() const
1988 {
1989 return MachNode::pipeline_class();
1990 }
1991
1992 //=============================================================================
1993
1994 enum RC {
1995 rc_bad,
1996 rc_int,
1997 rc_kreg,
1998 rc_float,
1999 rc_stack
2000 };
2001
2002 static enum RC rc_class(OptoReg::Name reg)
2003 {
2004 if( !OptoReg::is_valid(reg) ) return rc_bad;
2005
2006 if (OptoReg::is_stack(reg)) return rc_stack;
2007
2008 VMReg r = OptoReg::as_VMReg(reg);
2009
2010 if (r->is_Register()) return rc_int;
2011
2012 if (r->is_KRegister()) return rc_kreg;
2013
2014 assert(r->is_XMMRegister(), "must be");
2015 return rc_float;
2016 }
2017
2018 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2019 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2020 int src_hi, int dst_hi, uint ireg, outputStream* st);
2021
2022 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2023 int stack_offset, int reg, uint ireg, outputStream* st);
2024
2025 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2026 int dst_offset, uint ireg, outputStream* st) {
2027 if (masm) {
2028 switch (ireg) {
2029 case Op_VecS:
2030 __ movq(Address(rsp, -8), rax);
2031 __ movl(rax, Address(rsp, src_offset));
2032 __ movl(Address(rsp, dst_offset), rax);
2033 __ movq(rax, Address(rsp, -8));
2034 break;
2035 case Op_VecD:
2036 __ pushq(Address(rsp, src_offset));
2037 __ popq (Address(rsp, dst_offset));
2038 break;
2039 case Op_VecX:
2040 __ pushq(Address(rsp, src_offset));
2041 __ popq (Address(rsp, dst_offset));
2042 __ pushq(Address(rsp, src_offset+8));
2043 __ popq (Address(rsp, dst_offset+8));
2044 break;
2045 case Op_VecY:
2046 __ vmovdqu(Address(rsp, -32), xmm0);
2047 __ vmovdqu(xmm0, Address(rsp, src_offset));
2048 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2049 __ vmovdqu(xmm0, Address(rsp, -32));
2050 break;
2051 case Op_VecZ:
2052 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2053 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2054 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2055 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2056 break;
2057 default:
2058 ShouldNotReachHere();
2059 }
2060 #ifndef PRODUCT
2061 } else {
2062 switch (ireg) {
2063 case Op_VecS:
2064 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2065 "movl rax, [rsp + #%d]\n\t"
2066 "movl [rsp + #%d], rax\n\t"
2067 "movq rax, [rsp - #8]",
2068 src_offset, dst_offset);
2069 break;
2070 case Op_VecD:
2071 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2072 "popq [rsp + #%d]",
2073 src_offset, dst_offset);
2074 break;
2075 case Op_VecX:
2076 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2077 "popq [rsp + #%d]\n\t"
2078 "pushq [rsp + #%d]\n\t"
2079 "popq [rsp + #%d]",
2080 src_offset, dst_offset, src_offset+8, dst_offset+8);
2081 break;
2082 case Op_VecY:
2083 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2084 "vmovdqu xmm0, [rsp + #%d]\n\t"
2085 "vmovdqu [rsp + #%d], xmm0\n\t"
2086 "vmovdqu xmm0, [rsp - #32]",
2087 src_offset, dst_offset);
2088 break;
2089 case Op_VecZ:
2090 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2091 "vmovdqu xmm0, [rsp + #%d]\n\t"
2092 "vmovdqu [rsp + #%d], xmm0\n\t"
2093 "vmovdqu xmm0, [rsp - #64]",
2094 src_offset, dst_offset);
2095 break;
2096 default:
2097 ShouldNotReachHere();
2098 }
2099 #endif
2100 }
2101 }
2102
2103 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2104 PhaseRegAlloc* ra_,
2105 bool do_size,
2106 outputStream* st) const {
2107 assert(masm != nullptr || st != nullptr, "sanity");
2108 // Get registers to move
2109 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2110 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2111 OptoReg::Name dst_second = ra_->get_reg_second(this);
2112 OptoReg::Name dst_first = ra_->get_reg_first(this);
2113
2114 enum RC src_second_rc = rc_class(src_second);
2115 enum RC src_first_rc = rc_class(src_first);
2116 enum RC dst_second_rc = rc_class(dst_second);
2117 enum RC dst_first_rc = rc_class(dst_first);
2118
2119 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2120 "must move at least 1 register" );
2121
2122 if (src_first == dst_first && src_second == dst_second) {
2123 // Self copy, no move
2124 return 0;
2125 }
2126 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
2127 uint ireg = ideal_reg();
2128 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2129 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2130 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2131 // mem -> mem
2132 int src_offset = ra_->reg2offset(src_first);
2133 int dst_offset = ra_->reg2offset(dst_first);
2134 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2135 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2136 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2137 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2138 int stack_offset = ra_->reg2offset(dst_first);
2139 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2140 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2141 int stack_offset = ra_->reg2offset(src_first);
2142 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2143 } else {
2144 ShouldNotReachHere();
2145 }
2146 return 0;
2147 }
2148 if (src_first_rc == rc_stack) {
2149 // mem ->
2150 if (dst_first_rc == rc_stack) {
2151 // mem -> mem
2152 assert(src_second != dst_first, "overlap");
2153 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2154 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2155 // 64-bit
2156 int src_offset = ra_->reg2offset(src_first);
2157 int dst_offset = ra_->reg2offset(dst_first);
2158 if (masm) {
2159 __ pushq(Address(rsp, src_offset));
2160 __ popq (Address(rsp, dst_offset));
2161 #ifndef PRODUCT
2162 } else {
2163 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2164 "popq [rsp + #%d]",
2165 src_offset, dst_offset);
2166 #endif
2167 }
2168 } else {
2169 // 32-bit
2170 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2171 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2172 // No pushl/popl, so:
2173 int src_offset = ra_->reg2offset(src_first);
2174 int dst_offset = ra_->reg2offset(dst_first);
2175 if (masm) {
2176 __ movq(Address(rsp, -8), rax);
2177 __ movl(rax, Address(rsp, src_offset));
2178 __ movl(Address(rsp, dst_offset), rax);
2179 __ movq(rax, Address(rsp, -8));
2180 #ifndef PRODUCT
2181 } else {
2182 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2183 "movl rax, [rsp + #%d]\n\t"
2184 "movl [rsp + #%d], rax\n\t"
2185 "movq rax, [rsp - #8]",
2186 src_offset, dst_offset);
2187 #endif
2188 }
2189 }
2190 return 0;
2191 } else if (dst_first_rc == rc_int) {
2192 // mem -> gpr
2193 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2194 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2195 // 64-bit
2196 int offset = ra_->reg2offset(src_first);
2197 if (masm) {
2198 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2199 #ifndef PRODUCT
2200 } else {
2201 st->print("movq %s, [rsp + #%d]\t# spill",
2202 Matcher::regName[dst_first],
2203 offset);
2204 #endif
2205 }
2206 } else {
2207 // 32-bit
2208 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2209 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2210 int offset = ra_->reg2offset(src_first);
2211 if (masm) {
2212 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2213 #ifndef PRODUCT
2214 } else {
2215 st->print("movl %s, [rsp + #%d]\t# spill",
2216 Matcher::regName[dst_first],
2217 offset);
2218 #endif
2219 }
2220 }
2221 return 0;
2222 } else if (dst_first_rc == rc_float) {
2223 // mem-> xmm
2224 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2225 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2226 // 64-bit
2227 int offset = ra_->reg2offset(src_first);
2228 if (masm) {
2229 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2230 #ifndef PRODUCT
2231 } else {
2232 st->print("%s %s, [rsp + #%d]\t# spill",
2233 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2234 Matcher::regName[dst_first],
2235 offset);
2236 #endif
2237 }
2238 } else {
2239 // 32-bit
2240 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2241 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2242 int offset = ra_->reg2offset(src_first);
2243 if (masm) {
2244 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2245 #ifndef PRODUCT
2246 } else {
2247 st->print("movss %s, [rsp + #%d]\t# spill",
2248 Matcher::regName[dst_first],
2249 offset);
2250 #endif
2251 }
2252 }
2253 return 0;
2254 } else if (dst_first_rc == rc_kreg) {
2255 // mem -> kreg
2256 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2257 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2258 // 64-bit
2259 int offset = ra_->reg2offset(src_first);
2260 if (masm) {
2261 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2262 #ifndef PRODUCT
2263 } else {
2264 st->print("kmovq %s, [rsp + #%d]\t# spill",
2265 Matcher::regName[dst_first],
2266 offset);
2267 #endif
2268 }
2269 }
2270 return 0;
2271 }
2272 } else if (src_first_rc == rc_int) {
2273 // gpr ->
2274 if (dst_first_rc == rc_stack) {
2275 // gpr -> mem
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(dst_first);
2280 if (masm) {
2281 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("movq [rsp + #%d], %s\t# spill",
2285 offset,
2286 Matcher::regName[src_first]);
2287 #endif
2288 }
2289 } else {
2290 // 32-bit
2291 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2292 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2293 int offset = ra_->reg2offset(dst_first);
2294 if (masm) {
2295 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2296 #ifndef PRODUCT
2297 } else {
2298 st->print("movl [rsp + #%d], %s\t# spill",
2299 offset,
2300 Matcher::regName[src_first]);
2301 #endif
2302 }
2303 }
2304 return 0;
2305 } else if (dst_first_rc == rc_int) {
2306 // gpr -> gpr
2307 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2308 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2309 // 64-bit
2310 if (masm) {
2311 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2312 as_Register(Matcher::_regEncode[src_first]));
2313 #ifndef PRODUCT
2314 } else {
2315 st->print("movq %s, %s\t# spill",
2316 Matcher::regName[dst_first],
2317 Matcher::regName[src_first]);
2318 #endif
2319 }
2320 return 0;
2321 } else {
2322 // 32-bit
2323 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2324 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2325 if (masm) {
2326 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2327 as_Register(Matcher::_regEncode[src_first]));
2328 #ifndef PRODUCT
2329 } else {
2330 st->print("movl %s, %s\t# spill",
2331 Matcher::regName[dst_first],
2332 Matcher::regName[src_first]);
2333 #endif
2334 }
2335 return 0;
2336 }
2337 } else if (dst_first_rc == rc_float) {
2338 // gpr -> xmm
2339 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2340 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2341 // 64-bit
2342 if (masm) {
2343 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2344 #ifndef PRODUCT
2345 } else {
2346 st->print("movdq %s, %s\t# spill",
2347 Matcher::regName[dst_first],
2348 Matcher::regName[src_first]);
2349 #endif
2350 }
2351 } else {
2352 // 32-bit
2353 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2354 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2355 if (masm) {
2356 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2357 #ifndef PRODUCT
2358 } else {
2359 st->print("movdl %s, %s\t# spill",
2360 Matcher::regName[dst_first],
2361 Matcher::regName[src_first]);
2362 #endif
2363 }
2364 }
2365 return 0;
2366 } else if (dst_first_rc == rc_kreg) {
2367 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2368 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2369 // 64-bit
2370 if (masm) {
2371 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2372 #ifndef PRODUCT
2373 } else {
2374 st->print("kmovq %s, %s\t# spill",
2375 Matcher::regName[dst_first],
2376 Matcher::regName[src_first]);
2377 #endif
2378 }
2379 }
2380 Unimplemented();
2381 return 0;
2382 }
2383 } else if (src_first_rc == rc_float) {
2384 // xmm ->
2385 if (dst_first_rc == rc_stack) {
2386 // xmm -> mem
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 int offset = ra_->reg2offset(dst_first);
2391 if (masm) {
2392 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("movsd [rsp + #%d], %s\t# spill",
2396 offset,
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 } else {
2401 // 32-bit
2402 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2403 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2404 int offset = ra_->reg2offset(dst_first);
2405 if (masm) {
2406 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2407 #ifndef PRODUCT
2408 } else {
2409 st->print("movss [rsp + #%d], %s\t# spill",
2410 offset,
2411 Matcher::regName[src_first]);
2412 #endif
2413 }
2414 }
2415 return 0;
2416 } else if (dst_first_rc == rc_int) {
2417 // xmm -> gpr
2418 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2419 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2420 // 64-bit
2421 if (masm) {
2422 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2423 #ifndef PRODUCT
2424 } else {
2425 st->print("movdq %s, %s\t# spill",
2426 Matcher::regName[dst_first],
2427 Matcher::regName[src_first]);
2428 #endif
2429 }
2430 } else {
2431 // 32-bit
2432 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2433 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2434 if (masm) {
2435 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2436 #ifndef PRODUCT
2437 } else {
2438 st->print("movdl %s, %s\t# spill",
2439 Matcher::regName[dst_first],
2440 Matcher::regName[src_first]);
2441 #endif
2442 }
2443 }
2444 return 0;
2445 } else if (dst_first_rc == rc_float) {
2446 // xmm -> xmm
2447 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2448 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2449 // 64-bit
2450 if (masm) {
2451 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2452 #ifndef PRODUCT
2453 } else {
2454 st->print("%s %s, %s\t# spill",
2455 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2456 Matcher::regName[dst_first],
2457 Matcher::regName[src_first]);
2458 #endif
2459 }
2460 } else {
2461 // 32-bit
2462 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2463 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2464 if (masm) {
2465 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2466 #ifndef PRODUCT
2467 } else {
2468 st->print("%s %s, %s\t# spill",
2469 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2470 Matcher::regName[dst_first],
2471 Matcher::regName[src_first]);
2472 #endif
2473 }
2474 }
2475 return 0;
2476 } else if (dst_first_rc == rc_kreg) {
2477 assert(false, "Illegal spilling");
2478 return 0;
2479 }
2480 } else if (src_first_rc == rc_kreg) {
2481 if (dst_first_rc == rc_stack) {
2482 // mem -> kreg
2483 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2484 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2485 // 64-bit
2486 int offset = ra_->reg2offset(dst_first);
2487 if (masm) {
2488 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2489 #ifndef PRODUCT
2490 } else {
2491 st->print("kmovq [rsp + #%d] , %s\t# spill",
2492 offset,
2493 Matcher::regName[src_first]);
2494 #endif
2495 }
2496 }
2497 return 0;
2498 } else if (dst_first_rc == rc_int) {
2499 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2500 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2501 // 64-bit
2502 if (masm) {
2503 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2504 #ifndef PRODUCT
2505 } else {
2506 st->print("kmovq %s, %s\t# spill",
2507 Matcher::regName[dst_first],
2508 Matcher::regName[src_first]);
2509 #endif
2510 }
2511 }
2512 Unimplemented();
2513 return 0;
2514 } else if (dst_first_rc == rc_kreg) {
2515 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2516 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2517 // 64-bit
2518 if (masm) {
2519 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2520 #ifndef PRODUCT
2521 } else {
2522 st->print("kmovq %s, %s\t# spill",
2523 Matcher::regName[dst_first],
2524 Matcher::regName[src_first]);
2525 #endif
2526 }
2527 }
2528 return 0;
2529 } else if (dst_first_rc == rc_float) {
2530 assert(false, "Illegal spill");
2531 return 0;
2532 }
2533 }
2534
2535 assert(0," foo ");
2536 Unimplemented();
2537 return 0;
2538 }
2539
2540 #ifndef PRODUCT
2541 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2542 implementation(nullptr, ra_, false, st);
2543 }
2544 #endif
2545
2546 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2547 implementation(masm, ra_, false, nullptr);
2548 }
2549
2550 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2551 return MachNode::size(ra_);
2552 }
2553
2554 //=============================================================================
2555 #ifndef PRODUCT
2556 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2557 {
2558 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2559 int reg = ra_->get_reg_first(this);
2560 st->print("leaq %s, [rsp + #%d]\t# box lock",
2561 Matcher::regName[reg], offset);
2562 }
2563 #endif
2564
2565 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2566 {
2567 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2568 int reg = ra_->get_encode(this);
2569
2570 __ lea(as_Register(reg), Address(rsp, offset));
2571 }
2572
2573 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2574 {
2575 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2576 if (ra_->get_encode(this) > 15) {
2577 return (offset < 0x80) ? 6 : 9; // REX2
2578 } else {
2579 return (offset < 0x80) ? 5 : 8; // REX
2580 }
2581 }
2582
2583 //=============================================================================
2584 #ifndef PRODUCT
2585 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2586 {
2587 st->print_cr("MachVEPNode");
2588 }
2589 #endif
2590
2591 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2592 {
2593 CodeBuffer* cbuf = masm->code();
2594 if (!_verified) {
2595 __ ic_check(1);
2596 } else {
2597 if (ra_->C->stub_function() == nullptr) {
2598 // Emit the entry barrier in a temporary frame before unpacking because
2599 // it can deopt, which would require packing the scalarized args again.
2600 __ verified_entry(ra_->C, 0);
2601 __ entry_barrier();
2602 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2603 __ remove_frame(initial_framesize, false);
2604 }
2605 // Unpack inline type args passed as oop and then jump to
2606 // the verified entry point (skipping the unverified entry).
2607 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2608 // Emit code for verified entry and save increment for stack repair on return
2609 __ verified_entry(ra_->C, sp_inc);
2610 if (Compile::current()->output()->in_scratch_emit_size()) {
2611 Label dummy_verified_entry;
2612 __ jmp(dummy_verified_entry);
2613 } else {
2614 __ jmp(*_verified_entry);
2615 }
2616 }
2617 if (ra_->C->stub_function() == nullptr) {
2618 // Pad so that the next call to MachVEPNode::emit() starts out with the
2619 // correct alignment. This is needed by entry_barrier() to align the
2620 // compare. But unfortunately we need to align all 4 MachVEPNodes because
2621 // entry point offsets are computed using scratch_emit_size(), so starting
2622 // alignment must match the alignment of the scratch buffer, otherwise the sizes
2623 // will be off.
2624 __ align(4);
2625 }
2626 }
2627
2628 //=============================================================================
2629 #ifndef PRODUCT
2630 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2631 {
2632 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2633 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2634 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2635 }
2636 #endif
2637
2638 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2639 {
2640 __ ic_check(InteriorEntryAlignment);
2641 }
2642
2643
2644 //=============================================================================
2645
2646 bool Matcher::supports_vector_calling_convention(void) {
2647 return EnableVectorSupport;
2648 }
2649
2650 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2651 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2652 }
2653
2654 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2655 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2656 }
2657
2658 #ifdef ASSERT
2659 static bool is_ndd_demotable(const MachNode* mdef) {
2660 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2661 }
2662 #endif
2663
2664 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2665 int oper_index) {
2666 if (mdef == nullptr) {
2667 return false;
2668 }
2669
2670 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2671 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2672 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2673 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2674 return false;
2675 }
2676
2677 // Complex memory operand covers multiple incoming edges needed for
2678 // address computation. Biasing def towards any address component will not
2679 // result in NDD demotion by assembler.
2680 if (mdef->operand_num_edges(oper_index) != 1) {
2681 return false;
2682 }
2683
2684 // Demotion candidate must be register mask compatible with definition.
2685 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2686 if (!oper_mask.overlap(mdef->out_RegMask())) {
2687 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2688 return false;
2689 }
2690
2691 switch (oper_index) {
2692 // First operand of MachNode corresponding to Intel APX NDD selection
2693 // pattern can share its assigned register with definition operand if
2694 // their live ranges do not overlap. In such a scenario we can demote
2695 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2696 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2697 // are decorated with a special flag by instruction selector.
2698 case 1:
2699 return is_ndd_demotable_opr1(mdef);
2700
2701 // Definition operand of commutative operation can be biased towards second
2702 // operand.
2703 case 2:
2704 return is_ndd_demotable_opr2(mdef);
2705
2706 // Current scheme only selects up to two biasing candidates
2707 default:
2708 assert(false, "unhandled operand index: %s", mdef->Name());
2709 break;
2710 }
2711
2712 return false;
2713 }
2714
2715 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2716 assert(EnableVectorSupport, "sanity");
2717 int lo = XMM0_num;
2718 int hi = XMM0b_num;
2719 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2720 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2721 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2722 return OptoRegPair(hi, lo);
2723 }
2724
2725 // Is this branch offset short enough that a short branch can be used?
2726 //
2727 // NOTE: If the platform does not provide any short branch variants, then
2728 // this method should return false for offset 0.
2729 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2730 // The passed offset is relative to address of the branch.
2731 // On 86 a branch displacement is calculated relative to address
2732 // of a next instruction.
2733 offset -= br_size;
2734
2735 // the short version of jmpConUCF2 contains multiple branches,
2736 // making the reach slightly less
2737 if (rule == jmpConUCF2_rule)
2738 return (-126 <= offset && offset <= 125);
2739 return (-128 <= offset && offset <= 127);
2740 }
2741
2742 #ifdef ASSERT
2743 // Return whether or not this register is ever used as an argument.
2744 bool Matcher::can_be_java_arg(int reg)
2745 {
2746 return
2747 reg == RDI_num || reg == RDI_H_num ||
2748 reg == RSI_num || reg == RSI_H_num ||
2749 reg == RDX_num || reg == RDX_H_num ||
2750 reg == RCX_num || reg == RCX_H_num ||
2751 reg == R8_num || reg == R8_H_num ||
2752 reg == R9_num || reg == R9_H_num ||
2753 reg == R12_num || reg == R12_H_num ||
2754 reg == XMM0_num || reg == XMM0b_num ||
2755 reg == XMM1_num || reg == XMM1b_num ||
2756 reg == XMM2_num || reg == XMM2b_num ||
2757 reg == XMM3_num || reg == XMM3b_num ||
2758 reg == XMM4_num || reg == XMM4b_num ||
2759 reg == XMM5_num || reg == XMM5b_num ||
2760 reg == XMM6_num || reg == XMM6b_num ||
2761 reg == XMM7_num || reg == XMM7b_num;
2762 }
2763 #endif
2764
2765 uint Matcher::int_pressure_limit()
2766 {
2767 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2768 }
2769
2770 uint Matcher::float_pressure_limit()
2771 {
2772 // After experiment around with different values, the following default threshold
2773 // works best for LCM's register pressure scheduling on x64.
2774 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2775 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2776 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2777 }
2778
2779 // Register for DIVI projection of divmodI
2780 const RegMask& Matcher::divI_proj_mask() {
2781 return INT_RAX_REG_mask();
2782 }
2783
2784 // Register for MODI projection of divmodI
2785 const RegMask& Matcher::modI_proj_mask() {
2786 return INT_RDX_REG_mask();
2787 }
2788
2789 // Register for DIVL projection of divmodL
2790 const RegMask& Matcher::divL_proj_mask() {
2791 return LONG_RAX_REG_mask();
2792 }
2793
2794 // Register for MODL projection of divmodL
2795 const RegMask& Matcher::modL_proj_mask() {
2796 return LONG_RDX_REG_mask();
2797 }
2798
2799 %}
2800
2801 source_hpp %{
2802 // Header information of the source block.
2803 // Method declarations/definitions which are used outside
2804 // the ad-scope can conveniently be defined here.
2805 //
2806 // To keep related declarations/definitions/uses close together,
2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2808
2809 #include "runtime/vm_version.hpp"
2810
2811 class NativeJump;
2812
2813 class CallStubImpl {
2814
2815 //--------------------------------------------------------------
2816 //---< Used for optimization in Compile::shorten_branches >---
2817 //--------------------------------------------------------------
2818
2819 public:
2820 // Size of call trampoline stub.
2821 static uint size_call_trampoline() {
2822 return 0; // no call trampolines on this platform
2823 }
2824
2825 // number of relocations needed by a call trampoline stub
2826 static uint reloc_call_trampoline() {
2827 return 0; // no call trampolines on this platform
2828 }
2829 };
2830
2831 class HandlerImpl {
2832
2833 public:
2834
2835 static int emit_deopt_handler(C2_MacroAssembler* masm);
2836
2837 static uint size_deopt_handler() {
2838 // one call and one jmp.
2839 return 7;
2840 }
2841 };
2842
2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2844 switch(bytes) {
2845 case 4: // fall-through
2846 case 8: // fall-through
2847 case 16: return Assembler::AVX_128bit;
2848 case 32: return Assembler::AVX_256bit;
2849 case 64: return Assembler::AVX_512bit;
2850
2851 default: {
2852 ShouldNotReachHere();
2853 return Assembler::AVX_NoVec;
2854 }
2855 }
2856 }
2857
2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2859 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2860 }
2861
2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2863 uint def_idx = use->operand_index(opnd);
2864 Node* def = use->in(def_idx);
2865 return vector_length_encoding(def);
2866 }
2867
2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
2869 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2870 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2871 }
2872
2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2874 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2875 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2876 }
2877
2878 class Node::PD {
2879 public:
2880 enum NodeFlags : uint64_t {
2881 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2882 Flag_sets_carry_flag = Node::_last_flag << 2,
2883 Flag_sets_parity_flag = Node::_last_flag << 3,
2884 Flag_sets_zero_flag = Node::_last_flag << 4,
2885 Flag_sets_overflow_flag = Node::_last_flag << 5,
2886 Flag_sets_sign_flag = Node::_last_flag << 6,
2887 Flag_clears_carry_flag = Node::_last_flag << 7,
2888 Flag_clears_parity_flag = Node::_last_flag << 8,
2889 Flag_clears_zero_flag = Node::_last_flag << 9,
2890 Flag_clears_overflow_flag = Node::_last_flag << 10,
2891 Flag_clears_sign_flag = Node::_last_flag << 11,
2892 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2893 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2894 _last_flag = Flag_ndd_demotable_opr2
2895 };
2896 };
2897
2898 %} // end source_hpp
2899
2900 source %{
2901
2902 #include "opto/addnode.hpp"
2903 #include "c2_intelJccErratum_x86.hpp"
2904
2905 void PhaseOutput::pd_perform_mach_node_analysis() {
2906 if (VM_Version::has_intel_jcc_erratum()) {
2907 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2908 _buf_sizes._code += extra_padding;
2909 }
2910 }
2911
2912 int MachNode::pd_alignment_required() const {
2913 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2914 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2915 return IntelJccErratum::largest_jcc_size() + 1;
2916 } else {
2917 return 1;
2918 }
2919 }
2920
2921 int MachNode::compute_padding(int current_offset) const {
2922 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2923 Compile* C = Compile::current();
2924 PhaseOutput* output = C->output();
2925 Block* block = output->block();
2926 int index = output->index();
2927 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2928 } else {
2929 return 0;
2930 }
2931 }
2932
2933 // Emit deopt handler code.
2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2935
2936 // Note that the code buffer's insts_mark is always relative to insts.
2937 // That's why we must use the macroassembler to generate a handler.
2938 address base = __ start_a_stub(size_deopt_handler());
2939 if (base == nullptr) {
2940 ciEnv::current()->record_failure("CodeCache is full");
2941 return 0; // CodeBuffer::expand failed
2942 }
2943 int offset = __ offset();
2944
2945 Label start;
2946 __ bind(start);
2947
2948 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2949
2950 int entry_offset = __ offset();
2951
2952 __ jmp(start);
2953
2954 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2955 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2956 "out of bounds read in post-call NOP check");
2957 __ end_a_stub();
2958 return entry_offset;
2959 }
2960
2961 static Assembler::Width widthForType(BasicType bt) {
2962 if (bt == T_BYTE) {
2963 return Assembler::B;
2964 } else if (bt == T_SHORT) {
2965 return Assembler::W;
2966 } else if (bt == T_INT) {
2967 return Assembler::D;
2968 } else {
2969 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2970 return Assembler::Q;
2971 }
2972 }
2973
2974 //=============================================================================
2975
2976 // Float masks come from different places depending on platform.
2977 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2978 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2979 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2980 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2981 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2982 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2983 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2984 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2985 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2986 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2987 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2988 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2989 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2990 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2991 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2992 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2993 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2994 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2995 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2996
2997 //=============================================================================
2998 bool Matcher::match_rule_supported(int opcode) {
2999 if (!has_match_rule(opcode)) {
3000 return false; // no match rule present
3001 }
3002 switch (opcode) {
3003 case Op_AbsVL:
3004 case Op_StoreVectorScatter:
3005 if (UseAVX < 3) {
3006 return false;
3007 }
3008 break;
3009 case Op_PopCountI:
3010 case Op_PopCountL:
3011 if (!UsePopCountInstruction) {
3012 return false;
3013 }
3014 break;
3015 case Op_PopCountVI:
3016 if (UseAVX < 2) {
3017 return false;
3018 }
3019 break;
3020 case Op_CompressV:
3021 case Op_ExpandV:
3022 case Op_PopCountVL:
3023 if (UseAVX < 2) {
3024 return false;
3025 }
3026 break;
3027 case Op_MulVI:
3028 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3029 return false;
3030 }
3031 break;
3032 case Op_MulVL:
3033 if (UseSSE < 4) { // only with SSE4_1 or AVX
3034 return false;
3035 }
3036 break;
3037 case Op_MulReductionVL:
3038 if (VM_Version::supports_avx512dq() == false) {
3039 return false;
3040 }
3041 break;
3042 case Op_AbsVB:
3043 case Op_AbsVS:
3044 case Op_AbsVI:
3045 case Op_AddReductionVI:
3046 case Op_AndReductionV:
3047 case Op_OrReductionV:
3048 case Op_XorReductionV:
3049 if (UseSSE < 3) { // requires at least SSSE3
3050 return false;
3051 }
3052 break;
3053 case Op_MaxHF:
3054 case Op_MinHF:
3055 if (!VM_Version::supports_avx512vlbw()) {
3056 return false;
3057 } // fallthrough
3058 case Op_AddHF:
3059 case Op_DivHF:
3060 case Op_FmaHF:
3061 case Op_MulHF:
3062 case Op_ReinterpretS2HF:
3063 case Op_ReinterpretHF2S:
3064 case Op_SubHF:
3065 case Op_SqrtHF:
3066 if (!VM_Version::supports_avx512_fp16()) {
3067 return false;
3068 }
3069 break;
3070 case Op_VectorLoadShuffle:
3071 case Op_VectorRearrange:
3072 case Op_MulReductionVI:
3073 if (UseSSE < 4) { // requires at least SSE4
3074 return false;
3075 }
3076 break;
3077 case Op_IsInfiniteF:
3078 case Op_IsInfiniteD:
3079 if (!VM_Version::supports_avx512dq()) {
3080 return false;
3081 }
3082 break;
3083 case Op_SqrtVD:
3084 case Op_SqrtVF:
3085 case Op_VectorMaskCmp:
3086 case Op_VectorCastB2X:
3087 case Op_VectorCastS2X:
3088 case Op_VectorCastI2X:
3089 case Op_VectorCastL2X:
3090 case Op_VectorCastF2X:
3091 case Op_VectorCastD2X:
3092 case Op_VectorUCastB2X:
3093 case Op_VectorUCastS2X:
3094 case Op_VectorUCastI2X:
3095 case Op_VectorMaskCast:
3096 if (UseAVX < 1) { // enabled for AVX only
3097 return false;
3098 }
3099 break;
3100 case Op_PopulateIndex:
3101 if (UseAVX < 2) {
3102 return false;
3103 }
3104 break;
3105 case Op_RoundVF:
3106 if (UseAVX < 2) { // enabled for AVX2 only
3107 return false;
3108 }
3109 break;
3110 case Op_RoundVD:
3111 if (UseAVX < 3) {
3112 return false; // enabled for AVX3 only
3113 }
3114 break;
3115 case Op_CompareAndSwapL:
3116 case Op_CompareAndSwapP:
3117 break;
3118 case Op_StrIndexOf:
3119 if (!UseSSE42Intrinsics) {
3120 return false;
3121 }
3122 break;
3123 case Op_StrIndexOfChar:
3124 if (!UseSSE42Intrinsics) {
3125 return false;
3126 }
3127 break;
3128 case Op_OnSpinWait:
3129 if (VM_Version::supports_on_spin_wait() == false) {
3130 return false;
3131 }
3132 break;
3133 case Op_MulVB:
3134 case Op_LShiftVB:
3135 case Op_RShiftVB:
3136 case Op_URShiftVB:
3137 case Op_VectorInsert:
3138 case Op_VectorLoadMask:
3139 case Op_VectorStoreMask:
3140 case Op_VectorBlend:
3141 if (UseSSE < 4) {
3142 return false;
3143 }
3144 break;
3145 case Op_MaxD:
3146 case Op_MaxF:
3147 case Op_MinD:
3148 case Op_MinF:
3149 if (UseAVX < 1) { // enabled for AVX only
3150 return false;
3151 }
3152 break;
3153 case Op_CacheWB:
3154 case Op_CacheWBPreSync:
3155 case Op_CacheWBPostSync:
3156 if (!VM_Version::supports_data_cache_line_flush()) {
3157 return false;
3158 }
3159 break;
3160 case Op_ExtractB:
3161 case Op_ExtractL:
3162 case Op_ExtractI:
3163 case Op_RoundDoubleMode:
3164 if (UseSSE < 4) {
3165 return false;
3166 }
3167 break;
3168 case Op_RoundDoubleModeV:
3169 if (VM_Version::supports_avx() == false) {
3170 return false; // 128bit vroundpd is not available
3171 }
3172 break;
3173 case Op_LoadVectorGather:
3174 case Op_LoadVectorGatherMasked:
3175 if (UseAVX < 2) {
3176 return false;
3177 }
3178 break;
3179 case Op_FmaF:
3180 case Op_FmaD:
3181 case Op_FmaVD:
3182 case Op_FmaVF:
3183 if (!UseFMA) {
3184 return false;
3185 }
3186 break;
3187 case Op_MacroLogicV:
3188 if (UseAVX < 3 || !UseVectorMacroLogic) {
3189 return false;
3190 }
3191 break;
3192
3193 case Op_VectorCmpMasked:
3194 case Op_VectorMaskGen:
3195 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3196 return false;
3197 }
3198 break;
3199 case Op_VectorMaskFirstTrue:
3200 case Op_VectorMaskLastTrue:
3201 case Op_VectorMaskTrueCount:
3202 case Op_VectorMaskToLong:
3203 if (UseAVX < 1) {
3204 return false;
3205 }
3206 break;
3207 case Op_RoundF:
3208 case Op_RoundD:
3209 break;
3210 case Op_CopySignD:
3211 case Op_CopySignF:
3212 if (UseAVX < 3) {
3213 return false;
3214 }
3215 if (!VM_Version::supports_avx512vl()) {
3216 return false;
3217 }
3218 break;
3219 case Op_CompressBits:
3220 case Op_ExpandBits:
3221 if (!VM_Version::supports_bmi2()) {
3222 return false;
3223 }
3224 break;
3225 case Op_CompressM:
3226 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3227 return false;
3228 }
3229 break;
3230 case Op_ConvF2HF:
3231 case Op_ConvHF2F:
3232 if (!VM_Version::supports_float16()) {
3233 return false;
3234 }
3235 break;
3236 case Op_VectorCastF2HF:
3237 case Op_VectorCastHF2F:
3238 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3239 return false;
3240 }
3241 break;
3242 }
3243 return true; // Match rules are supported by default.
3244 }
3245
3246 //------------------------------------------------------------------------
3247
3248 static inline bool is_pop_count_instr_target(BasicType bt) {
3249 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3250 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3251 }
3252
3253 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3254 return match_rule_supported_vector(opcode, vlen, bt);
3255 }
3256
3257 // Identify extra cases that we might want to provide match rules for vector nodes and
3258 // other intrinsics guarded with vector length (vlen) and element type (bt).
3259 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3260 if (!match_rule_supported(opcode)) {
3261 return false;
3262 }
3263 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3264 // * SSE2 supports 128bit vectors for all types;
3265 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3266 // * AVX2 supports 256bit vectors for all types;
3267 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3268 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3269 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3270 // And MaxVectorSize is taken into account as well.
3271 if (!vector_size_supported(bt, vlen)) {
3272 return false;
3273 }
3274 // Special cases which require vector length follow:
3275 // * implementation limitations
3276 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3277 // * 128bit vroundpd instruction is present only in AVX1
3278 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3279 switch (opcode) {
3280 case Op_MaxVHF:
3281 case Op_MinVHF:
3282 if (!VM_Version::supports_avx512bw()) {
3283 return false;
3284 }
3285 case Op_AddVHF:
3286 case Op_DivVHF:
3287 case Op_FmaVHF:
3288 case Op_MulVHF:
3289 case Op_SubVHF:
3290 case Op_SqrtVHF:
3291 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3292 return false;
3293 }
3294 if (!VM_Version::supports_avx512_fp16()) {
3295 return false;
3296 }
3297 break;
3298 case Op_AbsVF:
3299 case Op_NegVF:
3300 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3301 return false; // 512bit vandps and vxorps are not available
3302 }
3303 break;
3304 case Op_AbsVD:
3305 case Op_NegVD:
3306 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3307 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3308 }
3309 break;
3310 case Op_RotateRightV:
3311 case Op_RotateLeftV:
3312 if (bt != T_INT && bt != T_LONG) {
3313 return false;
3314 } // fallthrough
3315 case Op_MacroLogicV:
3316 if (!VM_Version::supports_evex() ||
3317 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3318 return false;
3319 }
3320 break;
3321 case Op_ClearArray:
3322 case Op_VectorMaskGen:
3323 case Op_VectorCmpMasked:
3324 if (!VM_Version::supports_avx512bw()) {
3325 return false;
3326 }
3327 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3328 return false;
3329 }
3330 break;
3331 case Op_LoadVectorMasked:
3332 case Op_StoreVectorMasked:
3333 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3334 return false;
3335 }
3336 break;
3337 case Op_UMinV:
3338 case Op_UMaxV:
3339 if (UseAVX == 0) {
3340 return false;
3341 }
3342 break;
3343 case Op_UMinReductionV:
3344 case Op_UMaxReductionV:
3345 if (UseAVX == 0) {
3346 return false;
3347 }
3348 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3349 return false;
3350 }
3351 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3352 return false;
3353 }
3354 break;
3355 case Op_MaxV:
3356 case Op_MinV:
3357 if (UseSSE < 4 && is_integral_type(bt)) {
3358 return false;
3359 }
3360 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3361 // Float/Double intrinsics are enabled for AVX family currently.
3362 if (UseAVX == 0) {
3363 return false;
3364 }
3365 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3366 return false;
3367 }
3368 }
3369 break;
3370 case Op_CallLeafVector:
3371 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3372 return false;
3373 }
3374 break;
3375 case Op_AddReductionVI:
3376 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3377 return false;
3378 }
3379 // fallthrough
3380 case Op_AndReductionV:
3381 case Op_OrReductionV:
3382 case Op_XorReductionV:
3383 if (is_subword_type(bt) && (UseSSE < 4)) {
3384 return false;
3385 }
3386 break;
3387 case Op_MinReductionV:
3388 case Op_MaxReductionV:
3389 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3390 return false;
3391 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3392 return false;
3393 }
3394 // Float/Double intrinsics enabled for AVX family.
3395 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3396 return false;
3397 }
3398 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3399 return false;
3400 }
3401 break;
3402 case Op_VectorBlend:
3403 if (UseAVX == 0 && size_in_bits < 128) {
3404 return false;
3405 }
3406 break;
3407 case Op_VectorTest:
3408 if (UseSSE < 4) {
3409 return false; // Implementation limitation
3410 } else if (size_in_bits < 32) {
3411 return false; // Implementation limitation
3412 }
3413 break;
3414 case Op_VectorLoadShuffle:
3415 case Op_VectorRearrange:
3416 if(vlen == 2) {
3417 return false; // Implementation limitation due to how shuffle is loaded
3418 } else if (size_in_bits == 256 && UseAVX < 2) {
3419 return false; // Implementation limitation
3420 }
3421 break;
3422 case Op_VectorLoadMask:
3423 case Op_VectorMaskCast:
3424 if (size_in_bits == 256 && UseAVX < 2) {
3425 return false; // Implementation limitation
3426 }
3427 // fallthrough
3428 case Op_VectorStoreMask:
3429 if (vlen == 2) {
3430 return false; // Implementation limitation
3431 }
3432 break;
3433 case Op_PopulateIndex:
3434 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3435 return false;
3436 }
3437 break;
3438 case Op_VectorCastB2X:
3439 case Op_VectorCastS2X:
3440 case Op_VectorCastI2X:
3441 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3442 return false;
3443 }
3444 break;
3445 case Op_VectorCastL2X:
3446 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3447 return false;
3448 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3449 return false;
3450 }
3451 break;
3452 case Op_VectorCastF2X: {
3453 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3454 // happen after intermediate conversion to integer and special handling
3455 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3456 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3457 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3458 return false;
3459 }
3460 }
3461 // fallthrough
3462 case Op_VectorCastD2X:
3463 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3464 return false;
3465 }
3466 break;
3467 case Op_VectorCastF2HF:
3468 case Op_VectorCastHF2F:
3469 if (!VM_Version::supports_f16c() &&
3470 ((!VM_Version::supports_evex() ||
3471 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3472 return false;
3473 }
3474 break;
3475 case Op_RoundVD:
3476 if (!VM_Version::supports_avx512dq()) {
3477 return false;
3478 }
3479 break;
3480 case Op_MulReductionVI:
3481 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3482 return false;
3483 }
3484 break;
3485 case Op_LoadVectorGatherMasked:
3486 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3487 return false;
3488 }
3489 if (is_subword_type(bt) &&
3490 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3491 (size_in_bits < 64) ||
3492 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3493 return false;
3494 }
3495 break;
3496 case Op_StoreVectorScatterMasked:
3497 case Op_StoreVectorScatter:
3498 if (is_subword_type(bt)) {
3499 return false;
3500 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3501 return false;
3502 }
3503 // fallthrough
3504 case Op_LoadVectorGather:
3505 if (!is_subword_type(bt) && size_in_bits == 64) {
3506 return false;
3507 }
3508 if (is_subword_type(bt) && size_in_bits < 64) {
3509 return false;
3510 }
3511 break;
3512 case Op_SaturatingAddV:
3513 case Op_SaturatingSubV:
3514 if (UseAVX < 1) {
3515 return false; // Implementation limitation
3516 }
3517 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3518 return false;
3519 }
3520 break;
3521 case Op_SelectFromTwoVector:
3522 if (size_in_bits < 128) {
3523 return false;
3524 }
3525 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3526 return false;
3527 }
3528 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3529 return false;
3530 }
3531 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3532 return false;
3533 }
3534 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3535 return false;
3536 }
3537 break;
3538 case Op_MaskAll:
3539 if (!VM_Version::supports_evex()) {
3540 return false;
3541 }
3542 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3543 return false;
3544 }
3545 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3546 return false;
3547 }
3548 break;
3549 case Op_VectorMaskCmp:
3550 if (vlen < 2 || size_in_bits < 32) {
3551 return false;
3552 }
3553 break;
3554 case Op_CompressM:
3555 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3556 return false;
3557 }
3558 break;
3559 case Op_CompressV:
3560 case Op_ExpandV:
3561 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3562 return false;
3563 }
3564 if (size_in_bits < 128 ) {
3565 return false;
3566 }
3567 case Op_VectorLongToMask:
3568 if (UseAVX < 1) {
3569 return false;
3570 }
3571 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3572 return false;
3573 }
3574 break;
3575 case Op_SignumVD:
3576 case Op_SignumVF:
3577 if (UseAVX < 1) {
3578 return false;
3579 }
3580 break;
3581 case Op_PopCountVI:
3582 case Op_PopCountVL: {
3583 if (!is_pop_count_instr_target(bt) &&
3584 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3585 return false;
3586 }
3587 }
3588 break;
3589 case Op_ReverseV:
3590 case Op_ReverseBytesV:
3591 if (UseAVX < 2) {
3592 return false;
3593 }
3594 break;
3595 case Op_CountTrailingZerosV:
3596 case Op_CountLeadingZerosV:
3597 if (UseAVX < 2) {
3598 return false;
3599 }
3600 break;
3601 }
3602 return true; // Per default match rules are supported.
3603 }
3604
3605 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3606 // ADLC based match_rule_supported routine checks for the existence of pattern based
3607 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3608 // of their non-masked counterpart with mask edge being the differentiator.
3609 // This routine does a strict check on the existence of masked operation patterns
3610 // by returning a default false value for all the other opcodes apart from the
3611 // ones whose masked instruction patterns are defined in this file.
3612 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3613 return false;
3614 }
3615
3616 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3617 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3618 return false;
3619 }
3620 switch(opcode) {
3621 // Unary masked operations
3622 case Op_AbsVB:
3623 case Op_AbsVS:
3624 if(!VM_Version::supports_avx512bw()) {
3625 return false; // Implementation limitation
3626 }
3627 case Op_AbsVI:
3628 case Op_AbsVL:
3629 return true;
3630
3631 // Ternary masked operations
3632 case Op_FmaVF:
3633 case Op_FmaVD:
3634 return true;
3635
3636 case Op_MacroLogicV:
3637 if(bt != T_INT && bt != T_LONG) {
3638 return false;
3639 }
3640 return true;
3641
3642 // Binary masked operations
3643 case Op_AddVB:
3644 case Op_AddVS:
3645 case Op_SubVB:
3646 case Op_SubVS:
3647 case Op_MulVS:
3648 case Op_LShiftVS:
3649 case Op_RShiftVS:
3650 case Op_URShiftVS:
3651 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3652 if (!VM_Version::supports_avx512bw()) {
3653 return false; // Implementation limitation
3654 }
3655 return true;
3656
3657 case Op_MulVL:
3658 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3659 if (!VM_Version::supports_avx512dq()) {
3660 return false; // Implementation limitation
3661 }
3662 return true;
3663
3664 case Op_AndV:
3665 case Op_OrV:
3666 case Op_XorV:
3667 case Op_RotateRightV:
3668 case Op_RotateLeftV:
3669 if (bt != T_INT && bt != T_LONG) {
3670 return false; // Implementation limitation
3671 }
3672 return true;
3673
3674 case Op_VectorLoadMask:
3675 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3676 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3677 return false;
3678 }
3679 return true;
3680
3681 case Op_AddVI:
3682 case Op_AddVL:
3683 case Op_AddVF:
3684 case Op_AddVD:
3685 case Op_SubVI:
3686 case Op_SubVL:
3687 case Op_SubVF:
3688 case Op_SubVD:
3689 case Op_MulVI:
3690 case Op_MulVF:
3691 case Op_MulVD:
3692 case Op_DivVF:
3693 case Op_DivVD:
3694 case Op_SqrtVF:
3695 case Op_SqrtVD:
3696 case Op_LShiftVI:
3697 case Op_LShiftVL:
3698 case Op_RShiftVI:
3699 case Op_RShiftVL:
3700 case Op_URShiftVI:
3701 case Op_URShiftVL:
3702 case Op_LoadVectorMasked:
3703 case Op_StoreVectorMasked:
3704 case Op_LoadVectorGatherMasked:
3705 case Op_StoreVectorScatterMasked:
3706 return true;
3707
3708 case Op_UMinV:
3709 case Op_UMaxV:
3710 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3711 return false;
3712 } // fallthrough
3713 case Op_MaxV:
3714 case Op_MinV:
3715 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3716 return false; // Implementation limitation
3717 }
3718 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3719 return false; // Implementation limitation
3720 }
3721 return true;
3722 case Op_SaturatingAddV:
3723 case Op_SaturatingSubV:
3724 if (!is_subword_type(bt)) {
3725 return false;
3726 }
3727 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 return true;
3731
3732 case Op_VectorMaskCmp:
3733 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3734 return false; // Implementation limitation
3735 }
3736 return true;
3737
3738 case Op_VectorRearrange:
3739 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3740 return false; // Implementation limitation
3741 }
3742 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3743 return false; // Implementation limitation
3744 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3745 return false; // Implementation limitation
3746 }
3747 return true;
3748
3749 // Binary Logical operations
3750 case Op_AndVMask:
3751 case Op_OrVMask:
3752 case Op_XorVMask:
3753 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3754 return false; // Implementation limitation
3755 }
3756 return true;
3757
3758 case Op_PopCountVI:
3759 case Op_PopCountVL:
3760 if (!is_pop_count_instr_target(bt)) {
3761 return false;
3762 }
3763 return true;
3764
3765 case Op_MaskAll:
3766 return true;
3767
3768 case Op_CountLeadingZerosV:
3769 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3770 return true;
3771 }
3772 default:
3773 return false;
3774 }
3775 }
3776
3777 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3778 return false;
3779 }
3780
3781 // Return true if Vector::rearrange needs preparation of the shuffle argument
3782 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3783 switch (elem_bt) {
3784 case T_BYTE: return false;
3785 case T_SHORT: return !VM_Version::supports_avx512bw();
3786 case T_INT: return !VM_Version::supports_avx();
3787 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3788 default:
3789 ShouldNotReachHere();
3790 return false;
3791 }
3792 }
3793
3794 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3795 // Prefer predicate if the mask type is "TypePVectMask".
3796 return vt->isa_pvectmask() != nullptr;
3797 }
3798
3799 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3800 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3801 bool legacy = (generic_opnd->opcode() == LEGVEC);
3802 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3803 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3804 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3805 return new legVecZOper();
3806 }
3807 if (legacy) {
3808 switch (ideal_reg) {
3809 case Op_VecS: return new legVecSOper();
3810 case Op_VecD: return new legVecDOper();
3811 case Op_VecX: return new legVecXOper();
3812 case Op_VecY: return new legVecYOper();
3813 case Op_VecZ: return new legVecZOper();
3814 }
3815 } else {
3816 switch (ideal_reg) {
3817 case Op_VecS: return new vecSOper();
3818 case Op_VecD: return new vecDOper();
3819 case Op_VecX: return new vecXOper();
3820 case Op_VecY: return new vecYOper();
3821 case Op_VecZ: return new vecZOper();
3822 }
3823 }
3824 ShouldNotReachHere();
3825 return nullptr;
3826 }
3827
3828 bool Matcher::is_reg2reg_move(MachNode* m) {
3829 switch (m->rule()) {
3830 case MoveVec2Leg_rule:
3831 case MoveLeg2Vec_rule:
3832 case MoveF2VL_rule:
3833 case MoveF2LEG_rule:
3834 case MoveVL2F_rule:
3835 case MoveLEG2F_rule:
3836 case MoveD2VL_rule:
3837 case MoveD2LEG_rule:
3838 case MoveVL2D_rule:
3839 case MoveLEG2D_rule:
3840 return true;
3841 default:
3842 return false;
3843 }
3844 }
3845
3846 bool Matcher::is_generic_vector(MachOper* opnd) {
3847 switch (opnd->opcode()) {
3848 case VEC:
3849 case LEGVEC:
3850 return true;
3851 default:
3852 return false;
3853 }
3854 }
3855
3856 //------------------------------------------------------------------------
3857
3858 const RegMask* Matcher::predicate_reg_mask(void) {
3859 return &_VECTMASK_REG_mask;
3860 }
3861
3862 // Max vector size in bytes. 0 if not supported.
3863 int Matcher::vector_width_in_bytes(BasicType bt) {
3864 assert(is_java_primitive(bt), "only primitive type vectors");
3865 // SSE2 supports 128bit vectors for all types.
3866 // AVX2 supports 256bit vectors for all types.
3867 // AVX2/EVEX supports 512bit vectors for all types.
3868 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3869 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3870 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3871 size = (UseAVX > 2) ? 64 : 32;
3872 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3873 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3874 // Use flag to limit vector size.
3875 size = MIN2(size,(int)MaxVectorSize);
3876 // Minimum 2 values in vector (or 4 for bytes).
3877 switch (bt) {
3878 case T_DOUBLE:
3879 case T_LONG:
3880 if (size < 16) return 0;
3881 break;
3882 case T_FLOAT:
3883 case T_INT:
3884 if (size < 8) return 0;
3885 break;
3886 case T_BOOLEAN:
3887 if (size < 4) return 0;
3888 break;
3889 case T_CHAR:
3890 if (size < 4) return 0;
3891 break;
3892 case T_BYTE:
3893 if (size < 4) return 0;
3894 break;
3895 case T_SHORT:
3896 if (size < 4) return 0;
3897 break;
3898 default:
3899 ShouldNotReachHere();
3900 }
3901 return size;
3902 }
3903
3904 // Limits on vector size (number of elements) loaded into vector.
3905 int Matcher::max_vector_size(const BasicType bt) {
3906 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3907 }
3908 int Matcher::min_vector_size(const BasicType bt) {
3909 int max_size = max_vector_size(bt);
3910 // Min size which can be loaded into vector is 4 bytes.
3911 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3912 // Support for calling svml double64 vectors
3913 if (bt == T_DOUBLE) {
3914 size = 1;
3915 }
3916 return MIN2(size,max_size);
3917 }
3918
3919 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3920 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3921 // by default on Cascade Lake
3922 if (VM_Version::is_default_intel_cascade_lake()) {
3923 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3924 }
3925 return Matcher::max_vector_size(bt);
3926 }
3927
3928 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3929 return -1;
3930 }
3931
3932 // Vector ideal reg corresponding to specified size in bytes
3933 uint Matcher::vector_ideal_reg(int size) {
3934 assert(MaxVectorSize >= size, "");
3935 switch(size) {
3936 case 4: return Op_VecS;
3937 case 8: return Op_VecD;
3938 case 16: return Op_VecX;
3939 case 32: return Op_VecY;
3940 case 64: return Op_VecZ;
3941 }
3942 ShouldNotReachHere();
3943 return 0;
3944 }
3945
3946 // Check for shift by small constant as well
3947 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3948 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3949 shift->in(2)->get_int() <= 3 &&
3950 // Are there other uses besides address expressions?
3951 !matcher->is_visited(shift)) {
3952 address_visited.set(shift->_idx); // Flag as address_visited
3953 mstack.push(shift->in(2), Matcher::Visit);
3954 Node *conv = shift->in(1);
3955 // Allow Matcher to match the rule which bypass
3956 // ConvI2L operation for an array index on LP64
3957 // if the index value is positive.
3958 if (conv->Opcode() == Op_ConvI2L &&
3959 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3960 // Are there other uses besides address expressions?
3961 !matcher->is_visited(conv)) {
3962 address_visited.set(conv->_idx); // Flag as address_visited
3963 mstack.push(conv->in(1), Matcher::Pre_Visit);
3964 } else {
3965 mstack.push(conv, Matcher::Pre_Visit);
3966 }
3967 return true;
3968 }
3969 return false;
3970 }
3971
3972 // This function identifies sub-graphs in which a 'load' node is
3973 // input to two different nodes, and such that it can be matched
3974 // with BMI instructions like blsi, blsr, etc.
3975 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3976 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3977 // refers to the same node.
3978 //
3979 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3980 // This is a temporary solution until we make DAGs expressible in ADL.
3981 template<typename ConType>
3982 class FusedPatternMatcher {
3983 Node* _op1_node;
3984 Node* _mop_node;
3985 int _con_op;
3986
3987 static int match_next(Node* n, int next_op, int next_op_idx) {
3988 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3989 return -1;
3990 }
3991
3992 if (next_op_idx == -1) { // n is commutative, try rotations
3993 if (n->in(1)->Opcode() == next_op) {
3994 return 1;
3995 } else if (n->in(2)->Opcode() == next_op) {
3996 return 2;
3997 }
3998 } else {
3999 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
4000 if (n->in(next_op_idx)->Opcode() == next_op) {
4001 return next_op_idx;
4002 }
4003 }
4004 return -1;
4005 }
4006
4007 public:
4008 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4009 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4010
4011 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4012 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4013 typename ConType::NativeType con_value) {
4014 if (_op1_node->Opcode() != op1) {
4015 return false;
4016 }
4017 if (_mop_node->outcnt() > 2) {
4018 return false;
4019 }
4020 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4021 if (op1_op2_idx == -1) {
4022 return false;
4023 }
4024 // Memory operation must be the other edge
4025 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4026
4027 // Check that the mop node is really what we want
4028 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4029 Node* op2_node = _op1_node->in(op1_op2_idx);
4030 if (op2_node->outcnt() > 1) {
4031 return false;
4032 }
4033 assert(op2_node->Opcode() == op2, "Should be");
4034 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4035 if (op2_con_idx == -1) {
4036 return false;
4037 }
4038 // Memory operation must be the other edge
4039 int op2_mop_idx = (op2_con_idx & 1) + 1;
4040 // Check that the memory operation is the same node
4041 if (op2_node->in(op2_mop_idx) == _mop_node) {
4042 // Now check the constant
4043 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4044 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4045 return true;
4046 }
4047 }
4048 }
4049 return false;
4050 }
4051 };
4052
4053 static bool is_bmi_pattern(Node* n, Node* m) {
4054 assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
4055 if (n != nullptr && m != nullptr) {
4056 if (m->Opcode() == Op_LoadI) {
4057 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4058 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4059 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4060 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4061 } else if (m->Opcode() == Op_LoadL) {
4062 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4063 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4064 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4065 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4066 }
4067 }
4068 return false;
4069 }
4070
4071 // Should the matcher clone input 'm' of node 'n'?
4072 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4073 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4074 if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
4075 mstack.push(m, Visit);
4076 return true;
4077 }
4078 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4079 mstack.push(m, Visit); // m = ShiftCntV
4080 return true;
4081 }
4082 if (is_encode_and_store_pattern(n, m)) {
4083 mstack.push(m, Visit);
4084 return true;
4085 }
4086 return false;
4087 }
4088
4089 // Should the Matcher clone shifts on addressing modes, expecting them
4090 // to be subsumed into complex addressing expressions or compute them
4091 // into registers?
4092 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4093 Node *off = m->in(AddPNode::Offset);
4094 if (off->is_Con()) {
4095 address_visited.test_set(m->_idx); // Flag as address_visited
4096 Node *adr = m->in(AddPNode::Address);
4097
4098 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4099 // AtomicAdd is not an addressing expression.
4100 // Cheap to find it by looking for screwy base.
4101 if (adr->is_AddP() &&
4102 !adr->in(AddPNode::Base)->is_top() &&
4103 !adr->in(AddPNode::Offset)->is_Con() &&
4104 off->get_long() == (int) (off->get_long()) && // immL32
4105 // Are there other uses besides address expressions?
4106 !is_visited(adr)) {
4107 address_visited.set(adr->_idx); // Flag as address_visited
4108 Node *shift = adr->in(AddPNode::Offset);
4109 if (!clone_shift(shift, this, mstack, address_visited)) {
4110 mstack.push(shift, Pre_Visit);
4111 }
4112 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4113 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4114 } else {
4115 mstack.push(adr, Pre_Visit);
4116 }
4117
4118 // Clone X+offset as it also folds into most addressing expressions
4119 mstack.push(off, Visit);
4120 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4121 return true;
4122 } else if (clone_shift(off, this, mstack, address_visited)) {
4123 address_visited.test_set(m->_idx); // Flag as address_visited
4124 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4125 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4126 return true;
4127 }
4128 return false;
4129 }
4130
4131 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4132 switch (bt) {
4133 case BoolTest::eq:
4134 return Assembler::eq;
4135 case BoolTest::ne:
4136 return Assembler::neq;
4137 case BoolTest::le:
4138 case BoolTest::ule:
4139 return Assembler::le;
4140 case BoolTest::ge:
4141 case BoolTest::uge:
4142 return Assembler::nlt;
4143 case BoolTest::lt:
4144 case BoolTest::ult:
4145 return Assembler::lt;
4146 case BoolTest::gt:
4147 case BoolTest::ugt:
4148 return Assembler::nle;
4149 default : ShouldNotReachHere(); return Assembler::_false;
4150 }
4151 }
4152
4153 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4154 switch (bt) {
4155 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4156 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4157 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4158 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4159 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4160 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4161 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4162 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4163 }
4164 }
4165
4166 // Helper methods for MachSpillCopyNode::implementation().
4167 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4168 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4169 assert(ireg == Op_VecS || // 32bit vector
4170 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4171 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4172 "no non-adjacent vector moves" );
4173 if (masm) {
4174 switch (ireg) {
4175 case Op_VecS: // copy whole register
4176 case Op_VecD:
4177 case Op_VecX:
4178 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4179 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4180 } else {
4181 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4182 }
4183 break;
4184 case Op_VecY:
4185 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4186 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4187 } else {
4188 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4189 }
4190 break;
4191 case Op_VecZ:
4192 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4193 break;
4194 default:
4195 ShouldNotReachHere();
4196 }
4197 #ifndef PRODUCT
4198 } else {
4199 switch (ireg) {
4200 case Op_VecS:
4201 case Op_VecD:
4202 case Op_VecX:
4203 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4204 break;
4205 case Op_VecY:
4206 case Op_VecZ:
4207 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4208 break;
4209 default:
4210 ShouldNotReachHere();
4211 }
4212 #endif
4213 }
4214 }
4215
4216 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4217 int stack_offset, int reg, uint ireg, outputStream* st) {
4218 if (masm) {
4219 if (is_load) {
4220 switch (ireg) {
4221 case Op_VecS:
4222 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4223 break;
4224 case Op_VecD:
4225 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4226 break;
4227 case Op_VecX:
4228 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4229 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4230 } else {
4231 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4232 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4233 }
4234 break;
4235 case Op_VecY:
4236 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4237 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4238 } else {
4239 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4240 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4241 }
4242 break;
4243 case Op_VecZ:
4244 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4245 break;
4246 default:
4247 ShouldNotReachHere();
4248 }
4249 } else { // store
4250 switch (ireg) {
4251 case Op_VecS:
4252 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4253 break;
4254 case Op_VecD:
4255 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4256 break;
4257 case Op_VecX:
4258 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4259 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4260 }
4261 else {
4262 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4263 }
4264 break;
4265 case Op_VecY:
4266 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4267 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4268 }
4269 else {
4270 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4271 }
4272 break;
4273 case Op_VecZ:
4274 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4275 break;
4276 default:
4277 ShouldNotReachHere();
4278 }
4279 }
4280 #ifndef PRODUCT
4281 } else {
4282 if (is_load) {
4283 switch (ireg) {
4284 case Op_VecS:
4285 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecD:
4288 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4289 break;
4290 case Op_VecX:
4291 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4292 break;
4293 case Op_VecY:
4294 case Op_VecZ:
4295 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4296 break;
4297 default:
4298 ShouldNotReachHere();
4299 }
4300 } else { // store
4301 switch (ireg) {
4302 case Op_VecS:
4303 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecD:
4306 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4307 break;
4308 case Op_VecX:
4309 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4310 break;
4311 case Op_VecY:
4312 case Op_VecZ:
4313 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4314 break;
4315 default:
4316 ShouldNotReachHere();
4317 }
4318 }
4319 #endif
4320 }
4321 }
4322
4323 template <class T>
4324 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4325 int size = type2aelembytes(bt) * len;
4326 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4327 for (int i = 0; i < len; i++) {
4328 int offset = i * type2aelembytes(bt);
4329 switch (bt) {
4330 case T_BYTE: val->at(i) = con; break;
4331 case T_SHORT: {
4332 jshort c = con;
4333 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4334 break;
4335 }
4336 case T_INT: {
4337 jint c = con;
4338 memcpy(val->adr_at(offset), &c, sizeof(jint));
4339 break;
4340 }
4341 case T_LONG: {
4342 jlong c = con;
4343 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4344 break;
4345 }
4346 case T_FLOAT: {
4347 jfloat c = con;
4348 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4349 break;
4350 }
4351 case T_DOUBLE: {
4352 jdouble c = con;
4353 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4354 break;
4355 }
4356 default: assert(false, "%s", type2name(bt));
4357 }
4358 }
4359 return val;
4360 }
4361
4362 static inline jlong high_bit_set(BasicType bt) {
4363 switch (bt) {
4364 case T_BYTE: return 0x8080808080808080;
4365 case T_SHORT: return 0x8000800080008000;
4366 case T_INT: return 0x8000000080000000;
4367 case T_LONG: return 0x8000000000000000;
4368 default:
4369 ShouldNotReachHere();
4370 return 0;
4371 }
4372 }
4373
4374 #ifndef PRODUCT
4375 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4376 st->print("nop \t# %d bytes pad for loops and calls", _count);
4377 }
4378 #endif
4379
4380 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4381 __ nop(_count);
4382 }
4383
4384 uint MachNopNode::size(PhaseRegAlloc*) const {
4385 return _count;
4386 }
4387
4388 #ifndef PRODUCT
4389 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4390 st->print("# breakpoint");
4391 }
4392 #endif
4393
4394 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4395 __ int3();
4396 }
4397
4398 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4399 return MachNode::size(ra_);
4400 }
4401
4402 %}
4403
4404 //----------ENCODING BLOCK-----------------------------------------------------
4405 // This block specifies the encoding classes used by the compiler to
4406 // output byte streams. Encoding classes are parameterized macros
4407 // used by Machine Instruction Nodes in order to generate the bit
4408 // encoding of the instruction. Operands specify their base encoding
4409 // interface with the interface keyword. There are currently
4410 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4411 // COND_INTER. REG_INTER causes an operand to generate a function
4412 // which returns its register number when queried. CONST_INTER causes
4413 // an operand to generate a function which returns the value of the
4414 // constant when queried. MEMORY_INTER causes an operand to generate
4415 // four functions which return the Base Register, the Index Register,
4416 // the Scale Value, and the Offset Value of the operand when queried.
4417 // COND_INTER causes an operand to generate six functions which return
4418 // the encoding code (ie - encoding bits for the instruction)
4419 // associated with each basic boolean condition for a conditional
4420 // instruction.
4421 //
4422 // Instructions specify two basic values for encoding. Again, a
4423 // function is available to check if the constant displacement is an
4424 // oop. They use the ins_encode keyword to specify their encoding
4425 // classes (which must be a sequence of enc_class names, and their
4426 // parameters, specified in the encoding block), and they use the
4427 // opcode keyword to specify, in order, their primary, secondary, and
4428 // tertiary opcode. Only the opcode sections which a particular
4429 // instruction needs for encoding need to be specified.
4430 encode %{
4431 enc_class cdql_enc(no_rax_rdx_RegI div)
4432 %{
4433 // Full implementation of Java idiv and irem; checks for
4434 // special case as described in JVM spec., p.243 & p.271.
4435 //
4436 // normal case special case
4437 //
4438 // input : rax: dividend min_int
4439 // reg: divisor -1
4440 //
4441 // output: rax: quotient (= rax idiv reg) min_int
4442 // rdx: remainder (= rax irem reg) 0
4443 //
4444 // Code sequnce:
4445 //
4446 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4447 // 5: 75 07/08 jne e <normal>
4448 // 7: 33 d2 xor %edx,%edx
4449 // [div >= 8 -> offset + 1]
4450 // [REX_B]
4451 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4452 // c: 74 03/04 je 11 <done>
4453 // 000000000000000e <normal>:
4454 // e: 99 cltd
4455 // [div >= 8 -> offset + 1]
4456 // [REX_B]
4457 // f: f7 f9 idiv $div
4458 // 0000000000000011 <done>:
4459 Label normal;
4460 Label done;
4461
4462 // cmp $0x80000000,%eax
4463 __ cmpl(as_Register(RAX_enc), 0x80000000);
4464
4465 // jne e <normal>
4466 __ jccb(Assembler::notEqual, normal);
4467
4468 // xor %edx,%edx
4469 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4470
4471 // cmp $0xffffffffffffffff,%ecx
4472 __ cmpl($div$$Register, -1);
4473
4474 // je 11 <done>
4475 __ jccb(Assembler::equal, done);
4476
4477 // <normal>
4478 // cltd
4479 __ bind(normal);
4480 __ cdql();
4481
4482 // idivl
4483 // <done>
4484 __ idivl($div$$Register);
4485 __ bind(done);
4486 %}
4487
4488 enc_class cdqq_enc(no_rax_rdx_RegL div)
4489 %{
4490 // Full implementation of Java ldiv and lrem; checks for
4491 // special case as described in JVM spec., p.243 & p.271.
4492 //
4493 // normal case special case
4494 //
4495 // input : rax: dividend min_long
4496 // reg: divisor -1
4497 //
4498 // output: rax: quotient (= rax idiv reg) min_long
4499 // rdx: remainder (= rax irem reg) 0
4500 //
4501 // Code sequnce:
4502 //
4503 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4504 // 7: 00 00 80
4505 // a: 48 39 d0 cmp %rdx,%rax
4506 // d: 75 08 jne 17 <normal>
4507 // f: 33 d2 xor %edx,%edx
4508 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4509 // 15: 74 05 je 1c <done>
4510 // 0000000000000017 <normal>:
4511 // 17: 48 99 cqto
4512 // 19: 48 f7 f9 idiv $div
4513 // 000000000000001c <done>:
4514 Label normal;
4515 Label done;
4516
4517 // mov $0x8000000000000000,%rdx
4518 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4519
4520 // cmp %rdx,%rax
4521 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4522
4523 // jne 17 <normal>
4524 __ jccb(Assembler::notEqual, normal);
4525
4526 // xor %edx,%edx
4527 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4528
4529 // cmp $0xffffffffffffffff,$div
4530 __ cmpq($div$$Register, -1);
4531
4532 // je 1e <done>
4533 __ jccb(Assembler::equal, done);
4534
4535 // <normal>
4536 // cqto
4537 __ bind(normal);
4538 __ cdqq();
4539
4540 // idivq (note: must be emitted by the user of this rule)
4541 // <done>
4542 __ idivq($div$$Register);
4543 __ bind(done);
4544 %}
4545
4546 enc_class clear_avx %{
4547 DEBUG_ONLY(int off0 = __ offset());
4548 if (generate_vzeroupper(Compile::current())) {
4549 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4550 // Clear upper bits of YMM registers when current compiled code uses
4551 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4552 __ vzeroupper();
4553 }
4554 DEBUG_ONLY(int off1 = __ offset());
4555 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4556 %}
4557
4558 enc_class Java_To_Runtime(method meth) %{
4559 __ lea(r10, RuntimeAddress((address)$meth$$method));
4560 __ call(r10);
4561 __ post_call_nop();
4562 %}
4563
4564 enc_class Java_Static_Call(method meth)
4565 %{
4566 // JAVA STATIC CALL
4567 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4568 // determine who we intended to call.
4569 if (!_method) {
4570 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4571 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4572 // The NOP here is purely to ensure that eliding a call to
4573 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4574 __ nop(5);
4575 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4576 } else {
4577 int method_index = resolved_method_index(masm);
4578 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4579 : static_call_Relocation::spec(method_index);
4580 address mark = __ pc();
4581 int call_offset = __ offset();
4582 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4583 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4584 // Calls of the same statically bound method can share
4585 // a stub to the interpreter.
4586 __ code()->shared_stub_to_interp_for(_method, call_offset);
4587 } else {
4588 // Emit stubs for static call.
4589 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4590 __ clear_inst_mark();
4591 if (stub == nullptr) {
4592 ciEnv::current()->record_failure("CodeCache is full");
4593 return;
4594 }
4595 }
4596 }
4597 __ post_call_nop();
4598 %}
4599
4600 enc_class Java_Dynamic_Call(method meth) %{
4601 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4602 __ post_call_nop();
4603 %}
4604
4605 enc_class call_epilog %{
4606 if (VerifyStackAtCalls) {
4607 // Check that stack depth is unchanged: find majik cookie on stack
4608 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4609 Label L;
4610 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4611 __ jccb(Assembler::equal, L);
4612 // Die if stack mismatch
4613 __ int3();
4614 __ bind(L);
4615 }
4616 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4617 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4618 // Search for the corresponding projection, get the register and emit code that initializes it.
4619 uint con = (tf()->range_cc()->cnt() - 1);
4620 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4621 ProjNode* proj = fast_out(i)->as_Proj();
4622 if (proj->_con == con) {
4623 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4624 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4625 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4626 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4627 __ testq(rax, rax);
4628 __ setb(Assembler::notZero, toReg);
4629 __ movzbl(toReg, toReg);
4630 if (reg->is_stack()) {
4631 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4632 __ movq(Address(rsp, st_off), toReg);
4633 }
4634 break;
4635 }
4636 }
4637 if (return_value_is_used()) {
4638 // An inline type is returned as fields in multiple registers.
4639 // Rax either contains an oop if the inline type is buffered or a pointer
4640 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4641 // if the lowest bit is set to allow C2 to use the oop after null checking.
4642 // rax &= (rax & 1) - 1
4643 __ movptr(rscratch1, rax);
4644 __ andptr(rscratch1, 0x1);
4645 __ subptr(rscratch1, 0x1);
4646 __ andptr(rax, rscratch1);
4647 }
4648 }
4649 %}
4650
4651 %}
4652
4653 //----------FRAME--------------------------------------------------------------
4654 // Definition of frame structure and management information.
4655 //
4656 // S T A C K L A Y O U T Allocators stack-slot number
4657 // | (to get allocators register number
4658 // G Owned by | | v add OptoReg::stack0())
4659 // r CALLER | |
4660 // o | +--------+ pad to even-align allocators stack-slot
4661 // w V | pad0 | numbers; owned by CALLER
4662 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4663 // h ^ | in | 5
4664 // | | args | 4 Holes in incoming args owned by SELF
4665 // | | | | 3
4666 // | | +--------+
4667 // V | | old out| Empty on Intel, window on Sparc
4668 // | old |preserve| Must be even aligned.
4669 // | SP-+--------+----> Matcher::_old_SP, even aligned
4670 // | | in | 3 area for Intel ret address
4671 // Owned by |preserve| Empty on Sparc.
4672 // SELF +--------+
4673 // | | pad2 | 2 pad to align old SP
4674 // | +--------+ 1
4675 // | | locks | 0
4676 // | +--------+----> OptoReg::stack0(), even aligned
4677 // | | pad1 | 11 pad to align new SP
4678 // | +--------+
4679 // | | | 10
4680 // | | spills | 9 spills
4681 // V | | 8 (pad0 slot for callee)
4682 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4683 // ^ | out | 7
4684 // | | args | 6 Holes in outgoing args owned by CALLEE
4685 // Owned by +--------+
4686 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4687 // | new |preserve| Must be even-aligned.
4688 // | SP-+--------+----> Matcher::_new_SP, even aligned
4689 // | | |
4690 //
4691 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4692 // known from SELF's arguments and the Java calling convention.
4693 // Region 6-7 is determined per call site.
4694 // Note 2: If the calling convention leaves holes in the incoming argument
4695 // area, those holes are owned by SELF. Holes in the outgoing area
4696 // are owned by the CALLEE. Holes should not be necessary in the
4697 // incoming area, as the Java calling convention is completely under
4698 // the control of the AD file. Doubles can be sorted and packed to
4699 // avoid holes. Holes in the outgoing arguments may be necessary for
4700 // varargs C calling conventions.
4701 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4702 // even aligned with pad0 as needed.
4703 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4704 // region 6-11 is even aligned; it may be padded out more so that
4705 // the region from SP to FP meets the minimum stack alignment.
4706 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4707 // alignment. Region 11, pad1, may be dynamically extended so that
4708 // SP meets the minimum alignment.
4709
4710 frame
4711 %{
4712 // These three registers define part of the calling convention
4713 // between compiled code and the interpreter.
4714 inline_cache_reg(RAX); // Inline Cache Register
4715
4716 // Optional: name the operand used by cisc-spilling to access
4717 // [stack_pointer + offset]
4718 cisc_spilling_operand_name(indOffset32);
4719
4720 // Number of stack slots consumed by locking an object
4721 sync_stack_slots(2);
4722
4723 // Compiled code's Frame Pointer
4724 frame_pointer(RSP);
4725
4726 // Stack alignment requirement
4727 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4728
4729 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4730 // for calls to C. Supports the var-args backing area for register parms.
4731 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4732
4733 // The after-PROLOG location of the return address. Location of
4734 // return address specifies a type (REG or STACK) and a number
4735 // representing the register number (i.e. - use a register name) or
4736 // stack slot.
4737 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4738 // Otherwise, it is above the locks and verification slot and alignment word
4739 return_addr(STACK - 2 +
4740 align_up((Compile::current()->in_preserve_stack_slots() +
4741 Compile::current()->fixed_slots()),
4742 stack_alignment_in_slots()));
4743
4744 // Location of compiled Java return values. Same as C for now.
4745 return_value
4746 %{
4747 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4748 "only return normal values");
4749
4750 static const int lo[Op_RegL + 1] = {
4751 0,
4752 0,
4753 RAX_num, // Op_RegN
4754 RAX_num, // Op_RegI
4755 RAX_num, // Op_RegP
4756 XMM0_num, // Op_RegF
4757 XMM0_num, // Op_RegD
4758 RAX_num // Op_RegL
4759 };
4760 static const int hi[Op_RegL + 1] = {
4761 0,
4762 0,
4763 OptoReg::Bad, // Op_RegN
4764 OptoReg::Bad, // Op_RegI
4765 RAX_H_num, // Op_RegP
4766 OptoReg::Bad, // Op_RegF
4767 XMM0b_num, // Op_RegD
4768 RAX_H_num // Op_RegL
4769 };
4770 // Excluded flags and vector registers.
4771 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4772 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4773 %}
4774 %}
4775
4776 //----------ATTRIBUTES---------------------------------------------------------
4777 //----------Operand Attributes-------------------------------------------------
4778 op_attrib op_cost(0); // Required cost attribute
4779
4780 //----------Instruction Attributes---------------------------------------------
4781 ins_attrib ins_cost(100); // Required cost attribute
4782 ins_attrib ins_size(8); // Required size attribute (in bits)
4783 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4784 // a non-matching short branch variant
4785 // of some long branch?
4786 ins_attrib ins_alignment(1); // Required alignment attribute (must
4787 // be a power of 2) specifies the
4788 // alignment that some part of the
4789 // instruction (not necessarily the
4790 // start) requires. If > 1, a
4791 // compute_padding() function must be
4792 // provided for the instruction
4793
4794 // Whether this node is expanded during code emission into a sequence of
4795 // instructions and the first instruction can perform an implicit null check.
4796 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4797
4798 //----------OPERANDS-----------------------------------------------------------
4799 // Operand definitions must precede instruction definitions for correct parsing
4800 // in the ADLC because operands constitute user defined types which are used in
4801 // instruction definitions.
4802
4803 //----------Simple Operands----------------------------------------------------
4804 // Immediate Operands
4805 // Integer Immediate
4806 operand immI()
4807 %{
4808 match(ConI);
4809
4810 op_cost(10);
4811 format %{ %}
4812 interface(CONST_INTER);
4813 %}
4814
4815 // Constant for test vs zero
4816 operand immI_0()
4817 %{
4818 predicate(n->get_int() == 0);
4819 match(ConI);
4820
4821 op_cost(0);
4822 format %{ %}
4823 interface(CONST_INTER);
4824 %}
4825
4826 // Constant for increment
4827 operand immI_1()
4828 %{
4829 predicate(n->get_int() == 1);
4830 match(ConI);
4831
4832 op_cost(0);
4833 format %{ %}
4834 interface(CONST_INTER);
4835 %}
4836
4837 // Constant for decrement
4838 operand immI_M1()
4839 %{
4840 predicate(n->get_int() == -1);
4841 match(ConI);
4842
4843 op_cost(0);
4844 format %{ %}
4845 interface(CONST_INTER);
4846 %}
4847
4848 operand immI_2()
4849 %{
4850 predicate(n->get_int() == 2);
4851 match(ConI);
4852
4853 op_cost(0);
4854 format %{ %}
4855 interface(CONST_INTER);
4856 %}
4857
4858 operand immI_4()
4859 %{
4860 predicate(n->get_int() == 4);
4861 match(ConI);
4862
4863 op_cost(0);
4864 format %{ %}
4865 interface(CONST_INTER);
4866 %}
4867
4868 operand immI_8()
4869 %{
4870 predicate(n->get_int() == 8);
4871 match(ConI);
4872
4873 op_cost(0);
4874 format %{ %}
4875 interface(CONST_INTER);
4876 %}
4877
4878 // Valid scale values for addressing modes
4879 operand immI2()
4880 %{
4881 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4882 match(ConI);
4883
4884 format %{ %}
4885 interface(CONST_INTER);
4886 %}
4887
4888 operand immU7()
4889 %{
4890 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4891 match(ConI);
4892
4893 op_cost(5);
4894 format %{ %}
4895 interface(CONST_INTER);
4896 %}
4897
4898 operand immI8()
4899 %{
4900 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4901 match(ConI);
4902
4903 op_cost(5);
4904 format %{ %}
4905 interface(CONST_INTER);
4906 %}
4907
4908 operand immU8()
4909 %{
4910 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4911 match(ConI);
4912
4913 op_cost(5);
4914 format %{ %}
4915 interface(CONST_INTER);
4916 %}
4917
4918 operand immI16()
4919 %{
4920 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4921 match(ConI);
4922
4923 op_cost(10);
4924 format %{ %}
4925 interface(CONST_INTER);
4926 %}
4927
4928 // Int Immediate non-negative
4929 operand immU31()
4930 %{
4931 predicate(n->get_int() >= 0);
4932 match(ConI);
4933
4934 op_cost(0);
4935 format %{ %}
4936 interface(CONST_INTER);
4937 %}
4938
4939 // Pointer Immediate
4940 operand immP()
4941 %{
4942 match(ConP);
4943
4944 op_cost(10);
4945 format %{ %}
4946 interface(CONST_INTER);
4947 %}
4948
4949 // Null Pointer Immediate
4950 operand immP0()
4951 %{
4952 predicate(n->get_ptr() == 0);
4953 match(ConP);
4954
4955 op_cost(5);
4956 format %{ %}
4957 interface(CONST_INTER);
4958 %}
4959
4960 // Pointer Immediate
4961 operand immN() %{
4962 match(ConN);
4963
4964 op_cost(10);
4965 format %{ %}
4966 interface(CONST_INTER);
4967 %}
4968
4969 operand immNKlass() %{
4970 match(ConNKlass);
4971
4972 op_cost(10);
4973 format %{ %}
4974 interface(CONST_INTER);
4975 %}
4976
4977 // Null Pointer Immediate
4978 operand immN0() %{
4979 predicate(n->get_narrowcon() == 0);
4980 match(ConN);
4981
4982 op_cost(5);
4983 format %{ %}
4984 interface(CONST_INTER);
4985 %}
4986
4987 operand immP31()
4988 %{
4989 predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
4990 && (n->get_ptr() >> 31) == 0);
4991 match(ConP);
4992
4993 op_cost(5);
4994 format %{ %}
4995 interface(CONST_INTER);
4996 %}
4997
4998
4999 // Long Immediate
5000 operand immL()
5001 %{
5002 match(ConL);
5003
5004 op_cost(20);
5005 format %{ %}
5006 interface(CONST_INTER);
5007 %}
5008
5009 // Long Immediate 8-bit
5010 operand immL8()
5011 %{
5012 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5013 match(ConL);
5014
5015 op_cost(5);
5016 format %{ %}
5017 interface(CONST_INTER);
5018 %}
5019
5020 // Long Immediate 32-bit unsigned
5021 operand immUL32()
5022 %{
5023 predicate(n->get_long() == (unsigned int) (n->get_long()));
5024 match(ConL);
5025
5026 op_cost(10);
5027 format %{ %}
5028 interface(CONST_INTER);
5029 %}
5030
5031 // Long Immediate 32-bit signed
5032 operand immL32()
5033 %{
5034 predicate(n->get_long() == (int) (n->get_long()));
5035 match(ConL);
5036
5037 op_cost(15);
5038 format %{ %}
5039 interface(CONST_INTER);
5040 %}
5041
5042 operand immL_Pow2()
5043 %{
5044 predicate(is_power_of_2((julong)n->get_long()));
5045 match(ConL);
5046
5047 op_cost(15);
5048 format %{ %}
5049 interface(CONST_INTER);
5050 %}
5051
5052 operand immL_NotPow2()
5053 %{
5054 predicate(is_power_of_2((julong)~n->get_long()));
5055 match(ConL);
5056
5057 op_cost(15);
5058 format %{ %}
5059 interface(CONST_INTER);
5060 %}
5061
5062 // Long Immediate zero
5063 operand immL0()
5064 %{
5065 predicate(n->get_long() == 0L);
5066 match(ConL);
5067
5068 op_cost(10);
5069 format %{ %}
5070 interface(CONST_INTER);
5071 %}
5072
5073 // Constant for increment
5074 operand immL1()
5075 %{
5076 predicate(n->get_long() == 1);
5077 match(ConL);
5078
5079 format %{ %}
5080 interface(CONST_INTER);
5081 %}
5082
5083 // Constant for decrement
5084 operand immL_M1()
5085 %{
5086 predicate(n->get_long() == -1);
5087 match(ConL);
5088
5089 format %{ %}
5090 interface(CONST_INTER);
5091 %}
5092
5093 // Long Immediate: low 32-bit mask
5094 operand immL_32bits()
5095 %{
5096 predicate(n->get_long() == 0xFFFFFFFFL);
5097 match(ConL);
5098 op_cost(20);
5099
5100 format %{ %}
5101 interface(CONST_INTER);
5102 %}
5103
5104 // Int Immediate: 2^n-1, positive
5105 operand immI_Pow2M1()
5106 %{
5107 predicate((n->get_int() > 0)
5108 && is_power_of_2((juint)n->get_int() + 1));
5109 match(ConI);
5110
5111 op_cost(20);
5112 format %{ %}
5113 interface(CONST_INTER);
5114 %}
5115
5116 // Float Immediate zero
5117 operand immF0()
5118 %{
5119 predicate(jint_cast(n->getf()) == 0);
5120 match(ConF);
5121
5122 op_cost(5);
5123 format %{ %}
5124 interface(CONST_INTER);
5125 %}
5126
5127 // Float Immediate
5128 operand immF()
5129 %{
5130 match(ConF);
5131
5132 op_cost(15);
5133 format %{ %}
5134 interface(CONST_INTER);
5135 %}
5136
5137 // Half Float Immediate
5138 operand immH()
5139 %{
5140 match(ConH);
5141
5142 op_cost(15);
5143 format %{ %}
5144 interface(CONST_INTER);
5145 %}
5146
5147 // Double Immediate zero
5148 operand immD0()
5149 %{
5150 predicate(jlong_cast(n->getd()) == 0);
5151 match(ConD);
5152
5153 op_cost(5);
5154 format %{ %}
5155 interface(CONST_INTER);
5156 %}
5157
5158 // Double Immediate
5159 operand immD()
5160 %{
5161 match(ConD);
5162
5163 op_cost(15);
5164 format %{ %}
5165 interface(CONST_INTER);
5166 %}
5167
5168 // Immediates for special shifts (sign extend)
5169
5170 // Constants for increment
5171 operand immI_16()
5172 %{
5173 predicate(n->get_int() == 16);
5174 match(ConI);
5175
5176 format %{ %}
5177 interface(CONST_INTER);
5178 %}
5179
5180 operand immI_24()
5181 %{
5182 predicate(n->get_int() == 24);
5183 match(ConI);
5184
5185 format %{ %}
5186 interface(CONST_INTER);
5187 %}
5188
5189 // Constant for byte-wide masking
5190 operand immI_255()
5191 %{
5192 predicate(n->get_int() == 255);
5193 match(ConI);
5194
5195 format %{ %}
5196 interface(CONST_INTER);
5197 %}
5198
5199 // Constant for short-wide masking
5200 operand immI_65535()
5201 %{
5202 predicate(n->get_int() == 65535);
5203 match(ConI);
5204
5205 format %{ %}
5206 interface(CONST_INTER);
5207 %}
5208
5209 // Constant for byte-wide masking
5210 operand immL_255()
5211 %{
5212 predicate(n->get_long() == 255);
5213 match(ConL);
5214
5215 format %{ %}
5216 interface(CONST_INTER);
5217 %}
5218
5219 // Constant for short-wide masking
5220 operand immL_65535()
5221 %{
5222 predicate(n->get_long() == 65535);
5223 match(ConL);
5224
5225 format %{ %}
5226 interface(CONST_INTER);
5227 %}
5228
5229 // AOT Runtime Constants Address
5230 operand immAOTRuntimeConstantsAddress()
5231 %{
5232 // Check if the address is in the range of AOT Runtime Constants
5233 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5234 match(ConP);
5235
5236 op_cost(0);
5237 format %{ %}
5238 interface(CONST_INTER);
5239 %}
5240
5241 operand kReg()
5242 %{
5243 constraint(ALLOC_IN_RC(vectmask_reg));
5244 match(RegVectMask);
5245 format %{%}
5246 interface(REG_INTER);
5247 %}
5248
5249 // Register Operands
5250 // Integer Register
5251 operand rRegI()
5252 %{
5253 constraint(ALLOC_IN_RC(int_reg));
5254 match(RegI);
5255
5256 match(rax_RegI);
5257 match(rbx_RegI);
5258 match(rcx_RegI);
5259 match(rdx_RegI);
5260 match(rdi_RegI);
5261
5262 format %{ %}
5263 interface(REG_INTER);
5264 %}
5265
5266 // Special Registers
5267 operand rax_RegI()
5268 %{
5269 constraint(ALLOC_IN_RC(int_rax_reg));
5270 match(RegI);
5271 match(rRegI);
5272
5273 format %{ "RAX" %}
5274 interface(REG_INTER);
5275 %}
5276
5277 // Special Registers
5278 operand rbx_RegI()
5279 %{
5280 constraint(ALLOC_IN_RC(int_rbx_reg));
5281 match(RegI);
5282 match(rRegI);
5283
5284 format %{ "RBX" %}
5285 interface(REG_INTER);
5286 %}
5287
5288 operand rcx_RegI()
5289 %{
5290 constraint(ALLOC_IN_RC(int_rcx_reg));
5291 match(RegI);
5292 match(rRegI);
5293
5294 format %{ "RCX" %}
5295 interface(REG_INTER);
5296 %}
5297
5298 operand rdx_RegI()
5299 %{
5300 constraint(ALLOC_IN_RC(int_rdx_reg));
5301 match(RegI);
5302 match(rRegI);
5303
5304 format %{ "RDX" %}
5305 interface(REG_INTER);
5306 %}
5307
5308 operand rdi_RegI()
5309 %{
5310 constraint(ALLOC_IN_RC(int_rdi_reg));
5311 match(RegI);
5312 match(rRegI);
5313
5314 format %{ "RDI" %}
5315 interface(REG_INTER);
5316 %}
5317
5318 operand no_rax_rdx_RegI()
5319 %{
5320 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5321 match(RegI);
5322 match(rbx_RegI);
5323 match(rcx_RegI);
5324 match(rdi_RegI);
5325
5326 format %{ %}
5327 interface(REG_INTER);
5328 %}
5329
5330 operand no_rbp_r13_RegI()
5331 %{
5332 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5333 match(RegI);
5334 match(rRegI);
5335 match(rax_RegI);
5336 match(rbx_RegI);
5337 match(rcx_RegI);
5338 match(rdx_RegI);
5339 match(rdi_RegI);
5340
5341 format %{ %}
5342 interface(REG_INTER);
5343 %}
5344
5345 // Pointer Register
5346 operand any_RegP()
5347 %{
5348 constraint(ALLOC_IN_RC(any_reg));
5349 match(RegP);
5350 match(rax_RegP);
5351 match(rbx_RegP);
5352 match(rdi_RegP);
5353 match(rsi_RegP);
5354 match(rbp_RegP);
5355 match(r15_RegP);
5356 match(rRegP);
5357
5358 format %{ %}
5359 interface(REG_INTER);
5360 %}
5361
5362 operand rRegP()
5363 %{
5364 constraint(ALLOC_IN_RC(ptr_reg));
5365 match(RegP);
5366 match(rax_RegP);
5367 match(rbx_RegP);
5368 match(rdi_RegP);
5369 match(rsi_RegP);
5370 match(rbp_RegP); // See Q&A below about
5371 match(r15_RegP); // r15_RegP and rbp_RegP.
5372
5373 format %{ %}
5374 interface(REG_INTER);
5375 %}
5376
5377 operand rRegN() %{
5378 constraint(ALLOC_IN_RC(int_reg));
5379 match(RegN);
5380
5381 format %{ %}
5382 interface(REG_INTER);
5383 %}
5384
5385 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5386 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5387 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5388 // The output of an instruction is controlled by the allocator, which respects
5389 // register class masks, not match rules. Unless an instruction mentions
5390 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5391 // by the allocator as an input.
5392 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5393 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5394 // result, RBP is not included in the output of the instruction either.
5395
5396 // This operand is not allowed to use RBP even if
5397 // RBP is not used to hold the frame pointer.
5398 operand no_rbp_RegP()
5399 %{
5400 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5401 match(RegP);
5402 match(rbx_RegP);
5403 match(rsi_RegP);
5404 match(rdi_RegP);
5405
5406 format %{ %}
5407 interface(REG_INTER);
5408 %}
5409
5410 // Special Registers
5411 // Return a pointer value
5412 operand rax_RegP()
5413 %{
5414 constraint(ALLOC_IN_RC(ptr_rax_reg));
5415 match(RegP);
5416 match(rRegP);
5417
5418 format %{ %}
5419 interface(REG_INTER);
5420 %}
5421
5422 // Special Registers
5423 // Return a compressed pointer value
5424 operand rax_RegN()
5425 %{
5426 constraint(ALLOC_IN_RC(int_rax_reg));
5427 match(RegN);
5428 match(rRegN);
5429
5430 format %{ %}
5431 interface(REG_INTER);
5432 %}
5433
5434 // Used in AtomicAdd
5435 operand rbx_RegP()
5436 %{
5437 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5438 match(RegP);
5439 match(rRegP);
5440
5441 format %{ %}
5442 interface(REG_INTER);
5443 %}
5444
5445 operand rsi_RegP()
5446 %{
5447 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5448 match(RegP);
5449 match(rRegP);
5450
5451 format %{ %}
5452 interface(REG_INTER);
5453 %}
5454
5455 operand rbp_RegP()
5456 %{
5457 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5458 match(RegP);
5459 match(rRegP);
5460
5461 format %{ %}
5462 interface(REG_INTER);
5463 %}
5464
5465 // Used in rep stosq
5466 operand rdi_RegP()
5467 %{
5468 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5469 match(RegP);
5470 match(rRegP);
5471
5472 format %{ %}
5473 interface(REG_INTER);
5474 %}
5475
5476 operand r15_RegP()
5477 %{
5478 constraint(ALLOC_IN_RC(ptr_r15_reg));
5479 match(RegP);
5480 match(rRegP);
5481
5482 format %{ %}
5483 interface(REG_INTER);
5484 %}
5485
5486 operand rRegL()
5487 %{
5488 constraint(ALLOC_IN_RC(long_reg));
5489 match(RegL);
5490 match(rax_RegL);
5491 match(rdx_RegL);
5492
5493 format %{ %}
5494 interface(REG_INTER);
5495 %}
5496
5497 // Special Registers
5498 operand no_rax_rdx_RegL()
5499 %{
5500 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5501 match(RegL);
5502 match(rRegL);
5503
5504 format %{ %}
5505 interface(REG_INTER);
5506 %}
5507
5508 operand rax_RegL()
5509 %{
5510 constraint(ALLOC_IN_RC(long_rax_reg));
5511 match(RegL);
5512 match(rRegL);
5513
5514 format %{ "RAX" %}
5515 interface(REG_INTER);
5516 %}
5517
5518 operand rcx_RegL()
5519 %{
5520 constraint(ALLOC_IN_RC(long_rcx_reg));
5521 match(RegL);
5522 match(rRegL);
5523
5524 format %{ %}
5525 interface(REG_INTER);
5526 %}
5527
5528 operand rdx_RegL()
5529 %{
5530 constraint(ALLOC_IN_RC(long_rdx_reg));
5531 match(RegL);
5532 match(rRegL);
5533
5534 format %{ %}
5535 interface(REG_INTER);
5536 %}
5537
5538 operand r11_RegL()
5539 %{
5540 constraint(ALLOC_IN_RC(long_r11_reg));
5541 match(RegL);
5542 match(rRegL);
5543
5544 format %{ %}
5545 interface(REG_INTER);
5546 %}
5547
5548 operand no_rbp_r13_RegL()
5549 %{
5550 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5551 match(RegL);
5552 match(rRegL);
5553 match(rax_RegL);
5554 match(rcx_RegL);
5555 match(rdx_RegL);
5556
5557 format %{ %}
5558 interface(REG_INTER);
5559 %}
5560
5561 // Flags register, used as output of compare instructions
5562 operand rFlagsReg()
5563 %{
5564 constraint(ALLOC_IN_RC(int_flags));
5565 match(RegFlags);
5566
5567 format %{ "RFLAGS" %}
5568 interface(REG_INTER);
5569 %}
5570
5571 // Flags register, used as output of FLOATING POINT compare instructions
5572 operand rFlagsRegU()
5573 %{
5574 constraint(ALLOC_IN_RC(int_flags));
5575 match(RegFlags);
5576
5577 format %{ "RFLAGS_U" %}
5578 interface(REG_INTER);
5579 %}
5580
5581 operand rFlagsRegUCF() %{
5582 constraint(ALLOC_IN_RC(int_flags));
5583 match(RegFlags);
5584 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5585
5586 format %{ "RFLAGS_U_CF" %}
5587 interface(REG_INTER);
5588 %}
5589
5590 operand rFlagsRegUCFE() %{
5591 constraint(ALLOC_IN_RC(int_flags));
5592 match(RegFlags);
5593 predicate(UseAPX && VM_Version::supports_avx10_2());
5594
5595 format %{ "RFLAGS_U_CFE" %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Float register operands
5600 operand regF() %{
5601 constraint(ALLOC_IN_RC(float_reg));
5602 match(RegF);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 // Float register operands
5609 operand legRegF() %{
5610 constraint(ALLOC_IN_RC(float_reg_legacy));
5611 match(RegF);
5612
5613 format %{ %}
5614 interface(REG_INTER);
5615 %}
5616
5617 // Float register operands
5618 operand vlRegF() %{
5619 constraint(ALLOC_IN_RC(float_reg_vl));
5620 match(RegF);
5621
5622 format %{ %}
5623 interface(REG_INTER);
5624 %}
5625
5626 // Double register operands
5627 operand regD() %{
5628 constraint(ALLOC_IN_RC(double_reg));
5629 match(RegD);
5630
5631 format %{ %}
5632 interface(REG_INTER);
5633 %}
5634
5635 // Double register operands
5636 operand legRegD() %{
5637 constraint(ALLOC_IN_RC(double_reg_legacy));
5638 match(RegD);
5639
5640 format %{ %}
5641 interface(REG_INTER);
5642 %}
5643
5644 // Double register operands
5645 operand vlRegD() %{
5646 constraint(ALLOC_IN_RC(double_reg_vl));
5647 match(RegD);
5648
5649 format %{ %}
5650 interface(REG_INTER);
5651 %}
5652
5653 //----------Memory Operands----------------------------------------------------
5654 // Direct Memory Operand
5655 // operand direct(immP addr)
5656 // %{
5657 // match(addr);
5658
5659 // format %{ "[$addr]" %}
5660 // interface(MEMORY_INTER) %{
5661 // base(0xFFFFFFFF);
5662 // index(0x4);
5663 // scale(0x0);
5664 // disp($addr);
5665 // %}
5666 // %}
5667
5668 // Indirect Memory Operand
5669 operand indirect(any_RegP reg)
5670 %{
5671 constraint(ALLOC_IN_RC(ptr_reg));
5672 match(reg);
5673
5674 format %{ "[$reg]" %}
5675 interface(MEMORY_INTER) %{
5676 base($reg);
5677 index(0x4);
5678 scale(0x0);
5679 disp(0x0);
5680 %}
5681 %}
5682
5683 // Indirect Memory Plus Short Offset Operand
5684 operand indOffset8(any_RegP reg, immL8 off)
5685 %{
5686 constraint(ALLOC_IN_RC(ptr_reg));
5687 match(AddP reg off);
5688
5689 format %{ "[$reg + $off (8-bit)]" %}
5690 interface(MEMORY_INTER) %{
5691 base($reg);
5692 index(0x4);
5693 scale(0x0);
5694 disp($off);
5695 %}
5696 %}
5697
5698 // Indirect Memory Plus Long Offset Operand
5699 operand indOffset32(any_RegP reg, immL32 off)
5700 %{
5701 constraint(ALLOC_IN_RC(ptr_reg));
5702 match(AddP reg off);
5703
5704 format %{ "[$reg + $off (32-bit)]" %}
5705 interface(MEMORY_INTER) %{
5706 base($reg);
5707 index(0x4);
5708 scale(0x0);
5709 disp($off);
5710 %}
5711 %}
5712
5713 // Indirect Memory Plus Index Register Plus Offset Operand
5714 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5715 %{
5716 constraint(ALLOC_IN_RC(ptr_reg));
5717 match(AddP (AddP reg lreg) off);
5718
5719 op_cost(10);
5720 format %{"[$reg + $off + $lreg]" %}
5721 interface(MEMORY_INTER) %{
5722 base($reg);
5723 index($lreg);
5724 scale(0x0);
5725 disp($off);
5726 %}
5727 %}
5728
5729 // Indirect Memory Plus Index Register Plus Offset Operand
5730 operand indIndex(any_RegP reg, rRegL lreg)
5731 %{
5732 constraint(ALLOC_IN_RC(ptr_reg));
5733 match(AddP reg lreg);
5734
5735 op_cost(10);
5736 format %{"[$reg + $lreg]" %}
5737 interface(MEMORY_INTER) %{
5738 base($reg);
5739 index($lreg);
5740 scale(0x0);
5741 disp(0x0);
5742 %}
5743 %}
5744
5745 // Indirect Memory Times Scale Plus Index Register
5746 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5747 %{
5748 constraint(ALLOC_IN_RC(ptr_reg));
5749 match(AddP reg (LShiftL lreg scale));
5750
5751 op_cost(10);
5752 format %{"[$reg + $lreg << $scale]" %}
5753 interface(MEMORY_INTER) %{
5754 base($reg);
5755 index($lreg);
5756 scale($scale);
5757 disp(0x0);
5758 %}
5759 %}
5760
5761 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5762 %{
5763 constraint(ALLOC_IN_RC(ptr_reg));
5764 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5765 match(AddP reg (LShiftL (ConvI2L idx) scale));
5766
5767 op_cost(10);
5768 format %{"[$reg + pos $idx << $scale]" %}
5769 interface(MEMORY_INTER) %{
5770 base($reg);
5771 index($idx);
5772 scale($scale);
5773 disp(0x0);
5774 %}
5775 %}
5776
5777 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5778 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5779 %{
5780 constraint(ALLOC_IN_RC(ptr_reg));
5781 match(AddP (AddP reg (LShiftL lreg scale)) off);
5782
5783 op_cost(10);
5784 format %{"[$reg + $off + $lreg << $scale]" %}
5785 interface(MEMORY_INTER) %{
5786 base($reg);
5787 index($lreg);
5788 scale($scale);
5789 disp($off);
5790 %}
5791 %}
5792
5793 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5794 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5795 %{
5796 constraint(ALLOC_IN_RC(ptr_reg));
5797 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5798 match(AddP (AddP reg (ConvI2L idx)) off);
5799
5800 op_cost(10);
5801 format %{"[$reg + $off + $idx]" %}
5802 interface(MEMORY_INTER) %{
5803 base($reg);
5804 index($idx);
5805 scale(0x0);
5806 disp($off);
5807 %}
5808 %}
5809
5810 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5811 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5812 %{
5813 constraint(ALLOC_IN_RC(ptr_reg));
5814 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5815 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5816
5817 op_cost(10);
5818 format %{"[$reg + $off + $idx << $scale]" %}
5819 interface(MEMORY_INTER) %{
5820 base($reg);
5821 index($idx);
5822 scale($scale);
5823 disp($off);
5824 %}
5825 %}
5826
5827 // Indirect Narrow Oop Operand
5828 operand indCompressedOop(rRegN reg) %{
5829 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5830 constraint(ALLOC_IN_RC(ptr_reg));
5831 match(DecodeN reg);
5832
5833 op_cost(10);
5834 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5835 interface(MEMORY_INTER) %{
5836 base(0xc); // R12
5837 index($reg);
5838 scale(0x3);
5839 disp(0x0);
5840 %}
5841 %}
5842
5843 // Indirect Narrow Oop Plus Offset Operand
5844 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5845 // we can't free r12 even with CompressedOops::base() == nullptr.
5846 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5847 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5848 constraint(ALLOC_IN_RC(ptr_reg));
5849 match(AddP (DecodeN reg) off);
5850
5851 op_cost(10);
5852 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5853 interface(MEMORY_INTER) %{
5854 base(0xc); // R12
5855 index($reg);
5856 scale(0x3);
5857 disp($off);
5858 %}
5859 %}
5860
5861 // Indirect Memory Operand
5862 operand indirectNarrow(rRegN reg)
5863 %{
5864 predicate(CompressedOops::shift() == 0);
5865 constraint(ALLOC_IN_RC(ptr_reg));
5866 match(DecodeN reg);
5867
5868 format %{ "[$reg]" %}
5869 interface(MEMORY_INTER) %{
5870 base($reg);
5871 index(0x4);
5872 scale(0x0);
5873 disp(0x0);
5874 %}
5875 %}
5876
5877 // Indirect Memory Plus Short Offset Operand
5878 operand indOffset8Narrow(rRegN reg, immL8 off)
5879 %{
5880 predicate(CompressedOops::shift() == 0);
5881 constraint(ALLOC_IN_RC(ptr_reg));
5882 match(AddP (DecodeN reg) off);
5883
5884 format %{ "[$reg + $off (8-bit)]" %}
5885 interface(MEMORY_INTER) %{
5886 base($reg);
5887 index(0x4);
5888 scale(0x0);
5889 disp($off);
5890 %}
5891 %}
5892
5893 // Indirect Memory Plus Long Offset Operand
5894 operand indOffset32Narrow(rRegN reg, immL32 off)
5895 %{
5896 predicate(CompressedOops::shift() == 0);
5897 constraint(ALLOC_IN_RC(ptr_reg));
5898 match(AddP (DecodeN reg) off);
5899
5900 format %{ "[$reg + $off (32-bit)]" %}
5901 interface(MEMORY_INTER) %{
5902 base($reg);
5903 index(0x4);
5904 scale(0x0);
5905 disp($off);
5906 %}
5907 %}
5908
5909 // Indirect Memory Plus Index Register Plus Offset Operand
5910 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5911 %{
5912 predicate(CompressedOops::shift() == 0);
5913 constraint(ALLOC_IN_RC(ptr_reg));
5914 match(AddP (AddP (DecodeN reg) lreg) off);
5915
5916 op_cost(10);
5917 format %{"[$reg + $off + $lreg]" %}
5918 interface(MEMORY_INTER) %{
5919 base($reg);
5920 index($lreg);
5921 scale(0x0);
5922 disp($off);
5923 %}
5924 %}
5925
5926 // Indirect Memory Plus Index Register Plus Offset Operand
5927 operand indIndexNarrow(rRegN reg, rRegL lreg)
5928 %{
5929 predicate(CompressedOops::shift() == 0);
5930 constraint(ALLOC_IN_RC(ptr_reg));
5931 match(AddP (DecodeN reg) lreg);
5932
5933 op_cost(10);
5934 format %{"[$reg + $lreg]" %}
5935 interface(MEMORY_INTER) %{
5936 base($reg);
5937 index($lreg);
5938 scale(0x0);
5939 disp(0x0);
5940 %}
5941 %}
5942
5943 // Indirect Memory Times Scale Plus Index Register
5944 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5945 %{
5946 predicate(CompressedOops::shift() == 0);
5947 constraint(ALLOC_IN_RC(ptr_reg));
5948 match(AddP (DecodeN reg) (LShiftL lreg scale));
5949
5950 op_cost(10);
5951 format %{"[$reg + $lreg << $scale]" %}
5952 interface(MEMORY_INTER) %{
5953 base($reg);
5954 index($lreg);
5955 scale($scale);
5956 disp(0x0);
5957 %}
5958 %}
5959
5960 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5961 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5962 %{
5963 predicate(CompressedOops::shift() == 0);
5964 constraint(ALLOC_IN_RC(ptr_reg));
5965 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5966
5967 op_cost(10);
5968 format %{"[$reg + $off + $lreg << $scale]" %}
5969 interface(MEMORY_INTER) %{
5970 base($reg);
5971 index($lreg);
5972 scale($scale);
5973 disp($off);
5974 %}
5975 %}
5976
5977 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5978 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5979 %{
5980 constraint(ALLOC_IN_RC(ptr_reg));
5981 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5982 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5983
5984 op_cost(10);
5985 format %{"[$reg + $off + $idx]" %}
5986 interface(MEMORY_INTER) %{
5987 base($reg);
5988 index($idx);
5989 scale(0x0);
5990 disp($off);
5991 %}
5992 %}
5993
5994 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5995 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5996 %{
5997 constraint(ALLOC_IN_RC(ptr_reg));
5998 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5999 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
6000
6001 op_cost(10);
6002 format %{"[$reg + $off + $idx << $scale]" %}
6003 interface(MEMORY_INTER) %{
6004 base($reg);
6005 index($idx);
6006 scale($scale);
6007 disp($off);
6008 %}
6009 %}
6010
6011 //----------Special Memory Operands--------------------------------------------
6012 // Stack Slot Operand - This operand is used for loading and storing temporary
6013 // values on the stack where a match requires a value to
6014 // flow through memory.
6015 operand stackSlotP(sRegP reg)
6016 %{
6017 constraint(ALLOC_IN_RC(stack_slots));
6018 // No match rule because this operand is only generated in matching
6019
6020 format %{ "[$reg]" %}
6021 interface(MEMORY_INTER) %{
6022 base(0x4); // RSP
6023 index(0x4); // No Index
6024 scale(0x0); // No Scale
6025 disp($reg); // Stack Offset
6026 %}
6027 %}
6028
6029 operand stackSlotI(sRegI reg)
6030 %{
6031 constraint(ALLOC_IN_RC(stack_slots));
6032 // No match rule because this operand is only generated in matching
6033
6034 format %{ "[$reg]" %}
6035 interface(MEMORY_INTER) %{
6036 base(0x4); // RSP
6037 index(0x4); // No Index
6038 scale(0x0); // No Scale
6039 disp($reg); // Stack Offset
6040 %}
6041 %}
6042
6043 operand stackSlotF(sRegF reg)
6044 %{
6045 constraint(ALLOC_IN_RC(stack_slots));
6046 // No match rule because this operand is only generated in matching
6047
6048 format %{ "[$reg]" %}
6049 interface(MEMORY_INTER) %{
6050 base(0x4); // RSP
6051 index(0x4); // No Index
6052 scale(0x0); // No Scale
6053 disp($reg); // Stack Offset
6054 %}
6055 %}
6056
6057 operand stackSlotD(sRegD reg)
6058 %{
6059 constraint(ALLOC_IN_RC(stack_slots));
6060 // No match rule because this operand is only generated in matching
6061
6062 format %{ "[$reg]" %}
6063 interface(MEMORY_INTER) %{
6064 base(0x4); // RSP
6065 index(0x4); // No Index
6066 scale(0x0); // No Scale
6067 disp($reg); // Stack Offset
6068 %}
6069 %}
6070 operand stackSlotL(sRegL reg)
6071 %{
6072 constraint(ALLOC_IN_RC(stack_slots));
6073 // No match rule because this operand is only generated in matching
6074
6075 format %{ "[$reg]" %}
6076 interface(MEMORY_INTER) %{
6077 base(0x4); // RSP
6078 index(0x4); // No Index
6079 scale(0x0); // No Scale
6080 disp($reg); // Stack Offset
6081 %}
6082 %}
6083
6084 //----------Conditional Branch Operands----------------------------------------
6085 // Comparison Op - This is the operation of the comparison, and is limited to
6086 // the following set of codes:
6087 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6088 //
6089 // Other attributes of the comparison, such as unsignedness, are specified
6090 // by the comparison instruction that sets a condition code flags register.
6091 // That result is represented by a flags operand whose subtype is appropriate
6092 // to the unsignedness (etc.) of the comparison.
6093 //
6094 // Later, the instruction which matches both the Comparison Op (a Bool) and
6095 // the flags (produced by the Cmp) specifies the coding of the comparison op
6096 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6097
6098 // Comparison Code
6099 operand cmpOp()
6100 %{
6101 match(Bool);
6102
6103 format %{ "" %}
6104 interface(COND_INTER) %{
6105 equal(0x4, "e");
6106 not_equal(0x5, "ne");
6107 less(0xc, "l");
6108 greater_equal(0xd, "ge");
6109 less_equal(0xe, "le");
6110 greater(0xf, "g");
6111 overflow(0x0, "o");
6112 no_overflow(0x1, "no");
6113 %}
6114 %}
6115
6116 // Comparison Code, unsigned compare. Used by FP also, with
6117 // C2 (unordered) turned into GT or LT already. The other bits
6118 // C0 and C3 are turned into Carry & Zero flags.
6119 operand cmpOpU()
6120 %{
6121 match(Bool);
6122
6123 format %{ "" %}
6124 interface(COND_INTER) %{
6125 equal(0x4, "e");
6126 not_equal(0x5, "ne");
6127 less(0x2, "b");
6128 greater_equal(0x3, "ae");
6129 less_equal(0x6, "be");
6130 greater(0x7, "a");
6131 overflow(0x0, "o");
6132 no_overflow(0x1, "no");
6133 %}
6134 %}
6135
6136
6137 // Floating comparisons that don't require any fixup for the unordered case,
6138 // If both inputs of the comparison are the same, ZF is always set so we
6139 // don't need to use cmpOpUCF2 for eq/ne
6140 operand cmpOpUCF() %{
6141 match(Bool);
6142 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6143 (n->as_Bool()->_test._test == BoolTest::lt ||
6144 n->as_Bool()->_test._test == BoolTest::ge ||
6145 n->as_Bool()->_test._test == BoolTest::le ||
6146 n->as_Bool()->_test._test == BoolTest::gt ||
6147 n->in(1)->in(1) == n->in(1)->in(2)));
6148 format %{ "" %}
6149 interface(COND_INTER) %{
6150 equal(0xb, "np");
6151 not_equal(0xa, "p");
6152 less(0x2, "b");
6153 greater_equal(0x3, "ae");
6154 less_equal(0x6, "be");
6155 greater(0x7, "a");
6156 overflow(0x0, "o");
6157 no_overflow(0x1, "no");
6158 %}
6159 %}
6160
6161
6162 // Floating comparisons that can be fixed up with extra conditional jumps
6163 operand cmpOpUCF2() %{
6164 match(Bool);
6165 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6166 (n->as_Bool()->_test._test == BoolTest::ne ||
6167 n->as_Bool()->_test._test == BoolTest::eq) &&
6168 n->in(1)->in(1) != n->in(1)->in(2));
6169 format %{ "" %}
6170 interface(COND_INTER) %{
6171 equal(0x4, "e");
6172 not_equal(0x5, "ne");
6173 less(0x2, "b");
6174 greater_equal(0x3, "ae");
6175 less_equal(0x6, "be");
6176 greater(0x7, "a");
6177 overflow(0x0, "o");
6178 no_overflow(0x1, "no");
6179 %}
6180 %}
6181
6182
6183 // Floating point comparisons that set condition flags to test more directly,
6184 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6185 // are used for L (<) and LE (<=) conditions. It's important to convert these
6186 // latter conditions to ones that use unsigned tests before passing into an
6187 // instruction because the preceding comparison might be based on a three way
6188 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6189 operand cmpOpUCFE()
6190 %{
6191 match(Bool);
6192 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6193 (n->as_Bool()->_test._test == BoolTest::ne ||
6194 n->as_Bool()->_test._test == BoolTest::eq ||
6195 n->as_Bool()->_test._test == BoolTest::lt ||
6196 n->as_Bool()->_test._test == BoolTest::ge ||
6197 n->as_Bool()->_test._test == BoolTest::le ||
6198 n->as_Bool()->_test._test == BoolTest::gt));
6199
6200 format %{ "" %}
6201 interface(COND_INTER) %{
6202 equal(0x4, "e");
6203 not_equal(0x5, "ne");
6204 less(0x2, "b");
6205 greater_equal(0x3, "ae");
6206 less_equal(0x6, "be");
6207 greater(0x7, "a");
6208 overflow(0x0, "o");
6209 no_overflow(0x1, "no");
6210 %}
6211 %}
6212
6213 // Operands for bound floating pointer register arguments
6214 operand rxmm0() %{
6215 constraint(ALLOC_IN_RC(xmm0_reg));
6216 match(VecX);
6217 format%{%}
6218 interface(REG_INTER);
6219 %}
6220
6221 // Vectors
6222
6223 // Dummy generic vector class. Should be used for all vector operands.
6224 // Replaced with vec[SDXYZ] during post-selection pass.
6225 operand vec() %{
6226 constraint(ALLOC_IN_RC(dynamic));
6227 match(VecX);
6228 match(VecY);
6229 match(VecZ);
6230 match(VecS);
6231 match(VecD);
6232
6233 format %{ %}
6234 interface(REG_INTER);
6235 %}
6236
6237 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6238 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6239 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6240 // runtime code generation via reg_class_dynamic.
6241 operand legVec() %{
6242 constraint(ALLOC_IN_RC(dynamic));
6243 match(VecX);
6244 match(VecY);
6245 match(VecZ);
6246 match(VecS);
6247 match(VecD);
6248
6249 format %{ %}
6250 interface(REG_INTER);
6251 %}
6252
6253 // Replaces vec during post-selection cleanup. See above.
6254 operand vecS() %{
6255 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6256 match(VecS);
6257
6258 format %{ %}
6259 interface(REG_INTER);
6260 %}
6261
6262 // Replaces legVec during post-selection cleanup. See above.
6263 operand legVecS() %{
6264 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6265 match(VecS);
6266
6267 format %{ %}
6268 interface(REG_INTER);
6269 %}
6270
6271 // Replaces vec during post-selection cleanup. See above.
6272 operand vecD() %{
6273 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6274 match(VecD);
6275
6276 format %{ %}
6277 interface(REG_INTER);
6278 %}
6279
6280 // Replaces legVec during post-selection cleanup. See above.
6281 operand legVecD() %{
6282 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6283 match(VecD);
6284
6285 format %{ %}
6286 interface(REG_INTER);
6287 %}
6288
6289 // Replaces vec during post-selection cleanup. See above.
6290 operand vecX() %{
6291 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6292 match(VecX);
6293
6294 format %{ %}
6295 interface(REG_INTER);
6296 %}
6297
6298 // Replaces legVec during post-selection cleanup. See above.
6299 operand legVecX() %{
6300 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6301 match(VecX);
6302
6303 format %{ %}
6304 interface(REG_INTER);
6305 %}
6306
6307 // Replaces vec during post-selection cleanup. See above.
6308 operand vecY() %{
6309 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6310 match(VecY);
6311
6312 format %{ %}
6313 interface(REG_INTER);
6314 %}
6315
6316 // Replaces legVec during post-selection cleanup. See above.
6317 operand legVecY() %{
6318 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6319 match(VecY);
6320
6321 format %{ %}
6322 interface(REG_INTER);
6323 %}
6324
6325 // Replaces vec during post-selection cleanup. See above.
6326 operand vecZ() %{
6327 constraint(ALLOC_IN_RC(vectorz_reg));
6328 match(VecZ);
6329
6330 format %{ %}
6331 interface(REG_INTER);
6332 %}
6333
6334 // Replaces legVec during post-selection cleanup. See above.
6335 operand legVecZ() %{
6336 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6337 match(VecZ);
6338
6339 format %{ %}
6340 interface(REG_INTER);
6341 %}
6342
6343 //----------OPERAND CLASSES----------------------------------------------------
6344 // Operand Classes are groups of operands that are used as to simplify
6345 // instruction definitions by not requiring the AD writer to specify separate
6346 // instructions for every form of operand when the instruction accepts
6347 // multiple operand types with the same basic encoding and format. The classic
6348 // case of this is memory operands.
6349
6350 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6351 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6352 indCompressedOop, indCompressedOopOffset,
6353 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6354 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6355 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6356
6357 //----------PIPELINE-----------------------------------------------------------
6358 // Rules which define the behavior of the target architectures pipeline.
6359 pipeline %{
6360
6361 //----------ATTRIBUTES---------------------------------------------------------
6362 attributes %{
6363 variable_size_instructions; // Fixed size instructions
6364 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6365 instruction_unit_size = 1; // An instruction is 1 bytes long
6366 instruction_fetch_unit_size = 16; // The processor fetches one line
6367 instruction_fetch_units = 1; // of 16 bytes
6368 %}
6369
6370 //----------RESOURCES----------------------------------------------------------
6371 // Resources are the functional units available to the machine
6372
6373 // Generic P2/P3 pipeline
6374 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6375 // 3 instructions decoded per cycle.
6376 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6377 // 3 ALU op, only ALU0 handles mul instructions.
6378 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6379 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6380 BR, FPU,
6381 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6382
6383 //----------PIPELINE DESCRIPTION-----------------------------------------------
6384 // Pipeline Description specifies the stages in the machine's pipeline
6385
6386 // Generic P2/P3 pipeline
6387 pipe_desc(S0, S1, S2, S3, S4, S5);
6388
6389 //----------PIPELINE CLASSES---------------------------------------------------
6390 // Pipeline Classes describe the stages in which input and output are
6391 // referenced by the hardware pipeline.
6392
6393 // Naming convention: ialu or fpu
6394 // Then: _reg
6395 // Then: _reg if there is a 2nd register
6396 // Then: _long if it's a pair of instructions implementing a long
6397 // Then: _fat if it requires the big decoder
6398 // Or: _mem if it requires the big decoder and a memory unit.
6399
6400 // Integer ALU reg operation
6401 pipe_class ialu_reg(rRegI dst)
6402 %{
6403 single_instruction;
6404 dst : S4(write);
6405 dst : S3(read);
6406 DECODE : S0; // any decoder
6407 ALU : S3; // any alu
6408 %}
6409
6410 // Long ALU reg operation
6411 pipe_class ialu_reg_long(rRegL dst)
6412 %{
6413 instruction_count(2);
6414 dst : S4(write);
6415 dst : S3(read);
6416 DECODE : S0(2); // any 2 decoders
6417 ALU : S3(2); // both alus
6418 %}
6419
6420 // Integer ALU reg operation using big decoder
6421 pipe_class ialu_reg_fat(rRegI dst)
6422 %{
6423 single_instruction;
6424 dst : S4(write);
6425 dst : S3(read);
6426 D0 : S0; // big decoder only
6427 ALU : S3; // any alu
6428 %}
6429
6430 // Integer ALU reg-reg operation
6431 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6432 %{
6433 single_instruction;
6434 dst : S4(write);
6435 src : S3(read);
6436 DECODE : S0; // any decoder
6437 ALU : S3; // any alu
6438 %}
6439
6440 // Integer ALU reg-reg operation
6441 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6442 %{
6443 single_instruction;
6444 dst : S4(write);
6445 src : S3(read);
6446 D0 : S0; // big decoder only
6447 ALU : S3; // any alu
6448 %}
6449
6450 // Integer ALU reg-mem operation
6451 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6452 %{
6453 single_instruction;
6454 dst : S5(write);
6455 mem : S3(read);
6456 D0 : S0; // big decoder only
6457 ALU : S4; // any alu
6458 MEM : S3; // any mem
6459 %}
6460
6461 // Integer mem operation (prefetch)
6462 pipe_class ialu_mem(memory mem)
6463 %{
6464 single_instruction;
6465 mem : S3(read);
6466 D0 : S0; // big decoder only
6467 MEM : S3; // any mem
6468 %}
6469
6470 // Integer Store to Memory
6471 pipe_class ialu_mem_reg(memory mem, rRegI src)
6472 %{
6473 single_instruction;
6474 mem : S3(read);
6475 src : S5(read);
6476 D0 : S0; // big decoder only
6477 ALU : S4; // any alu
6478 MEM : S3;
6479 %}
6480
6481 // // Long Store to Memory
6482 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6483 // %{
6484 // instruction_count(2);
6485 // mem : S3(read);
6486 // src : S5(read);
6487 // D0 : S0(2); // big decoder only; twice
6488 // ALU : S4(2); // any 2 alus
6489 // MEM : S3(2); // Both mems
6490 // %}
6491
6492 // Integer Store to Memory
6493 pipe_class ialu_mem_imm(memory mem)
6494 %{
6495 single_instruction;
6496 mem : S3(read);
6497 D0 : S0; // big decoder only
6498 ALU : S4; // any alu
6499 MEM : S3;
6500 %}
6501
6502 // Integer ALU0 reg-reg operation
6503 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6504 %{
6505 single_instruction;
6506 dst : S4(write);
6507 src : S3(read);
6508 D0 : S0; // Big decoder only
6509 ALU0 : S3; // only alu0
6510 %}
6511
6512 // Integer ALU0 reg-mem operation
6513 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6514 %{
6515 single_instruction;
6516 dst : S5(write);
6517 mem : S3(read);
6518 D0 : S0; // big decoder only
6519 ALU0 : S4; // ALU0 only
6520 MEM : S3; // any mem
6521 %}
6522
6523 // Integer ALU reg-reg operation
6524 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6525 %{
6526 single_instruction;
6527 cr : S4(write);
6528 src1 : S3(read);
6529 src2 : S3(read);
6530 DECODE : S0; // any decoder
6531 ALU : S3; // any alu
6532 %}
6533
6534 // Integer ALU reg-imm operation
6535 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6536 %{
6537 single_instruction;
6538 cr : S4(write);
6539 src1 : S3(read);
6540 DECODE : S0; // any decoder
6541 ALU : S3; // any alu
6542 %}
6543
6544 // Integer ALU reg-mem operation
6545 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6546 %{
6547 single_instruction;
6548 cr : S4(write);
6549 src1 : S3(read);
6550 src2 : S3(read);
6551 D0 : S0; // big decoder only
6552 ALU : S4; // any alu
6553 MEM : S3;
6554 %}
6555
6556 // Conditional move reg-reg
6557 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6558 %{
6559 instruction_count(4);
6560 y : S4(read);
6561 q : S3(read);
6562 p : S3(read);
6563 DECODE : S0(4); // any decoder
6564 %}
6565
6566 // Conditional move reg-reg
6567 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6568 %{
6569 single_instruction;
6570 dst : S4(write);
6571 src : S3(read);
6572 cr : S3(read);
6573 DECODE : S0; // any decoder
6574 %}
6575
6576 // Conditional move reg-mem
6577 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6578 %{
6579 single_instruction;
6580 dst : S4(write);
6581 src : S3(read);
6582 cr : S3(read);
6583 DECODE : S0; // any decoder
6584 MEM : S3;
6585 %}
6586
6587 // Conditional move reg-reg long
6588 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6589 %{
6590 single_instruction;
6591 dst : S4(write);
6592 src : S3(read);
6593 cr : S3(read);
6594 DECODE : S0(2); // any 2 decoders
6595 %}
6596
6597 // Float reg-reg operation
6598 pipe_class fpu_reg(regD dst)
6599 %{
6600 instruction_count(2);
6601 dst : S3(read);
6602 DECODE : S0(2); // any 2 decoders
6603 FPU : S3;
6604 %}
6605
6606 // Float reg-reg operation
6607 pipe_class fpu_reg_reg(regD dst, regD src)
6608 %{
6609 instruction_count(2);
6610 dst : S4(write);
6611 src : S3(read);
6612 DECODE : S0(2); // any 2 decoders
6613 FPU : S3;
6614 %}
6615
6616 // Float reg-reg operation
6617 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6618 %{
6619 instruction_count(3);
6620 dst : S4(write);
6621 src1 : S3(read);
6622 src2 : S3(read);
6623 DECODE : S0(3); // any 3 decoders
6624 FPU : S3(2);
6625 %}
6626
6627 // Float reg-reg operation
6628 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6629 %{
6630 instruction_count(4);
6631 dst : S4(write);
6632 src1 : S3(read);
6633 src2 : S3(read);
6634 src3 : S3(read);
6635 DECODE : S0(4); // any 3 decoders
6636 FPU : S3(2);
6637 %}
6638
6639 // Float reg-reg operation
6640 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6641 %{
6642 instruction_count(4);
6643 dst : S4(write);
6644 src1 : S3(read);
6645 src2 : S3(read);
6646 src3 : S3(read);
6647 DECODE : S1(3); // any 3 decoders
6648 D0 : S0; // Big decoder only
6649 FPU : S3(2);
6650 MEM : S3;
6651 %}
6652
6653 // Float reg-mem operation
6654 pipe_class fpu_reg_mem(regD dst, memory mem)
6655 %{
6656 instruction_count(2);
6657 dst : S5(write);
6658 mem : S3(read);
6659 D0 : S0; // big decoder only
6660 DECODE : S1; // any decoder for FPU POP
6661 FPU : S4;
6662 MEM : S3; // any mem
6663 %}
6664
6665 // Float reg-mem operation
6666 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6667 %{
6668 instruction_count(3);
6669 dst : S5(write);
6670 src1 : S3(read);
6671 mem : S3(read);
6672 D0 : S0; // big decoder only
6673 DECODE : S1(2); // any decoder for FPU POP
6674 FPU : S4;
6675 MEM : S3; // any mem
6676 %}
6677
6678 // Float mem-reg operation
6679 pipe_class fpu_mem_reg(memory mem, regD src)
6680 %{
6681 instruction_count(2);
6682 src : S5(read);
6683 mem : S3(read);
6684 DECODE : S0; // any decoder for FPU PUSH
6685 D0 : S1; // big decoder only
6686 FPU : S4;
6687 MEM : S3; // any mem
6688 %}
6689
6690 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6691 %{
6692 instruction_count(3);
6693 src1 : S3(read);
6694 src2 : S3(read);
6695 mem : S3(read);
6696 DECODE : S0(2); // any decoder for FPU PUSH
6697 D0 : S1; // big decoder only
6698 FPU : S4;
6699 MEM : S3; // any mem
6700 %}
6701
6702 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6703 %{
6704 instruction_count(3);
6705 src1 : S3(read);
6706 src2 : S3(read);
6707 mem : S4(read);
6708 DECODE : S0; // any decoder for FPU PUSH
6709 D0 : S0(2); // big decoder only
6710 FPU : S4;
6711 MEM : S3(2); // any mem
6712 %}
6713
6714 pipe_class fpu_mem_mem(memory dst, memory src1)
6715 %{
6716 instruction_count(2);
6717 src1 : S3(read);
6718 dst : S4(read);
6719 D0 : S0(2); // big decoder only
6720 MEM : S3(2); // any mem
6721 %}
6722
6723 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6724 %{
6725 instruction_count(3);
6726 src1 : S3(read);
6727 src2 : S3(read);
6728 dst : S4(read);
6729 D0 : S0(3); // big decoder only
6730 FPU : S4;
6731 MEM : S3(3); // any mem
6732 %}
6733
6734 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6735 %{
6736 instruction_count(3);
6737 src1 : S4(read);
6738 mem : S4(read);
6739 DECODE : S0; // any decoder for FPU PUSH
6740 D0 : S0(2); // big decoder only
6741 FPU : S4;
6742 MEM : S3(2); // any mem
6743 %}
6744
6745 // Float load constant
6746 pipe_class fpu_reg_con(regD dst)
6747 %{
6748 instruction_count(2);
6749 dst : S5(write);
6750 D0 : S0; // big decoder only for the load
6751 DECODE : S1; // any decoder for FPU POP
6752 FPU : S4;
6753 MEM : S3; // any mem
6754 %}
6755
6756 // Float load constant
6757 pipe_class fpu_reg_reg_con(regD dst, regD src)
6758 %{
6759 instruction_count(3);
6760 dst : S5(write);
6761 src : S3(read);
6762 D0 : S0; // big decoder only for the load
6763 DECODE : S1(2); // any decoder for FPU POP
6764 FPU : S4;
6765 MEM : S3; // any mem
6766 %}
6767
6768 // UnConditional branch
6769 pipe_class pipe_jmp(label labl)
6770 %{
6771 single_instruction;
6772 BR : S3;
6773 %}
6774
6775 // Conditional branch
6776 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6777 %{
6778 single_instruction;
6779 cr : S1(read);
6780 BR : S3;
6781 %}
6782
6783 // Allocation idiom
6784 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6785 %{
6786 instruction_count(1); force_serialization;
6787 fixed_latency(6);
6788 heap_ptr : S3(read);
6789 DECODE : S0(3);
6790 D0 : S2;
6791 MEM : S3;
6792 ALU : S3(2);
6793 dst : S5(write);
6794 BR : S5;
6795 %}
6796
6797 // Generic big/slow expanded idiom
6798 pipe_class pipe_slow()
6799 %{
6800 instruction_count(10); multiple_bundles; force_serialization;
6801 fixed_latency(100);
6802 D0 : S0(2);
6803 MEM : S3(2);
6804 %}
6805
6806 // The real do-nothing guy
6807 pipe_class empty()
6808 %{
6809 instruction_count(0);
6810 %}
6811
6812 // Define the class for the Nop node
6813 define
6814 %{
6815 MachNop = empty;
6816 %}
6817
6818 %}
6819
6820 //----------INSTRUCTIONS-------------------------------------------------------
6821 //
6822 // match -- States which machine-independent subtree may be replaced
6823 // by this instruction.
6824 // ins_cost -- The estimated cost of this instruction is used by instruction
6825 // selection to identify a minimum cost tree of machine
6826 // instructions that matches a tree of machine-independent
6827 // instructions.
6828 // format -- A string providing the disassembly for this instruction.
6829 // The value of an instruction's operand may be inserted
6830 // by referring to it with a '$' prefix.
6831 // opcode -- Three instruction opcodes may be provided. These are referred
6832 // to within an encode class as $primary, $secondary, and $tertiary
6833 // rrspectively. The primary opcode is commonly used to
6834 // indicate the type of machine instruction, while secondary
6835 // and tertiary are often used for prefix options or addressing
6836 // modes.
6837 // ins_encode -- A list of encode classes with parameters. The encode class
6838 // name must have been defined in an 'enc_class' specification
6839 // in the encode section of the architecture description.
6840
6841 // ============================================================================
6842
6843 instruct ShouldNotReachHere() %{
6844 match(Halt);
6845 format %{ "stop\t# ShouldNotReachHere" %}
6846 ins_encode %{
6847 if (is_reachable()) {
6848 const char* str = __ code_string(_halt_reason);
6849 __ stop(str);
6850 }
6851 %}
6852 ins_pipe(pipe_slow);
6853 %}
6854
6855 // ============================================================================
6856
6857 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6858 // Load Float
6859 instruct MoveF2VL(vlRegF dst, regF src) %{
6860 match(Set dst src);
6861 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6862 ins_encode %{
6863 ShouldNotReachHere();
6864 %}
6865 ins_pipe( fpu_reg_reg );
6866 %}
6867
6868 // Load Float
6869 instruct MoveF2LEG(legRegF dst, regF src) %{
6870 match(Set dst src);
6871 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6872 ins_encode %{
6873 ShouldNotReachHere();
6874 %}
6875 ins_pipe( fpu_reg_reg );
6876 %}
6877
6878 // Load Float
6879 instruct MoveVL2F(regF dst, vlRegF src) %{
6880 match(Set dst src);
6881 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6882 ins_encode %{
6883 ShouldNotReachHere();
6884 %}
6885 ins_pipe( fpu_reg_reg );
6886 %}
6887
6888 // Load Float
6889 instruct MoveLEG2F(regF dst, legRegF src) %{
6890 match(Set dst src);
6891 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6892 ins_encode %{
6893 ShouldNotReachHere();
6894 %}
6895 ins_pipe( fpu_reg_reg );
6896 %}
6897
6898 // Load Double
6899 instruct MoveD2VL(vlRegD dst, regD src) %{
6900 match(Set dst src);
6901 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6902 ins_encode %{
6903 ShouldNotReachHere();
6904 %}
6905 ins_pipe( fpu_reg_reg );
6906 %}
6907
6908 // Load Double
6909 instruct MoveD2LEG(legRegD dst, regD src) %{
6910 match(Set dst src);
6911 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6912 ins_encode %{
6913 ShouldNotReachHere();
6914 %}
6915 ins_pipe( fpu_reg_reg );
6916 %}
6917
6918 // Load Double
6919 instruct MoveVL2D(regD dst, vlRegD src) %{
6920 match(Set dst src);
6921 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6922 ins_encode %{
6923 ShouldNotReachHere();
6924 %}
6925 ins_pipe( fpu_reg_reg );
6926 %}
6927
6928 // Load Double
6929 instruct MoveLEG2D(regD dst, legRegD src) %{
6930 match(Set dst src);
6931 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6932 ins_encode %{
6933 ShouldNotReachHere();
6934 %}
6935 ins_pipe( fpu_reg_reg );
6936 %}
6937
6938 //----------Load/Store/Move Instructions---------------------------------------
6939 //----------Load Instructions--------------------------------------------------
6940
6941 // Load Byte (8 bit signed)
6942 instruct loadB(rRegI dst, memory mem)
6943 %{
6944 match(Set dst (LoadB mem));
6945
6946 ins_cost(125);
6947 format %{ "movsbl $dst, $mem\t# byte" %}
6948
6949 ins_encode %{
6950 __ movsbl($dst$$Register, $mem$$Address);
6951 %}
6952
6953 ins_pipe(ialu_reg_mem);
6954 %}
6955
6956 // Load Byte (8 bit signed) into Long Register
6957 instruct loadB2L(rRegL dst, memory mem)
6958 %{
6959 match(Set dst (ConvI2L (LoadB mem)));
6960
6961 ins_cost(125);
6962 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6963
6964 ins_encode %{
6965 __ movsbq($dst$$Register, $mem$$Address);
6966 %}
6967
6968 ins_pipe(ialu_reg_mem);
6969 %}
6970
6971 // Load Unsigned Byte (8 bit UNsigned)
6972 instruct loadUB(rRegI dst, memory mem)
6973 %{
6974 match(Set dst (LoadUB mem));
6975
6976 ins_cost(125);
6977 format %{ "movzbl $dst, $mem\t# ubyte" %}
6978
6979 ins_encode %{
6980 __ movzbl($dst$$Register, $mem$$Address);
6981 %}
6982
6983 ins_pipe(ialu_reg_mem);
6984 %}
6985
6986 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6987 instruct loadUB2L(rRegL dst, memory mem)
6988 %{
6989 match(Set dst (ConvI2L (LoadUB mem)));
6990
6991 ins_cost(125);
6992 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6993
6994 ins_encode %{
6995 __ movzbq($dst$$Register, $mem$$Address);
6996 %}
6997
6998 ins_pipe(ialu_reg_mem);
6999 %}
7000
7001 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
7002 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7003 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
7004 effect(KILL cr);
7005
7006 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
7007 "andl $dst, right_n_bits($mask, 8)" %}
7008 ins_encode %{
7009 Register Rdst = $dst$$Register;
7010 __ movzbq(Rdst, $mem$$Address);
7011 __ andl(Rdst, $mask$$constant & right_n_bits(8));
7012 %}
7013 ins_pipe(ialu_reg_mem);
7014 %}
7015
7016 // Load Short (16 bit signed)
7017 instruct loadS(rRegI dst, memory mem)
7018 %{
7019 match(Set dst (LoadS mem));
7020
7021 ins_cost(125);
7022 format %{ "movswl $dst, $mem\t# short" %}
7023
7024 ins_encode %{
7025 __ movswl($dst$$Register, $mem$$Address);
7026 %}
7027
7028 ins_pipe(ialu_reg_mem);
7029 %}
7030
7031 // Load Short (16 bit signed) to Byte (8 bit signed)
7032 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7033 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
7034
7035 ins_cost(125);
7036 format %{ "movsbl $dst, $mem\t# short -> byte" %}
7037 ins_encode %{
7038 __ movsbl($dst$$Register, $mem$$Address);
7039 %}
7040 ins_pipe(ialu_reg_mem);
7041 %}
7042
7043 // Load Short (16 bit signed) into Long Register
7044 instruct loadS2L(rRegL dst, memory mem)
7045 %{
7046 match(Set dst (ConvI2L (LoadS mem)));
7047
7048 ins_cost(125);
7049 format %{ "movswq $dst, $mem\t# short -> long" %}
7050
7051 ins_encode %{
7052 __ movswq($dst$$Register, $mem$$Address);
7053 %}
7054
7055 ins_pipe(ialu_reg_mem);
7056 %}
7057
7058 // Load Unsigned Short/Char (16 bit UNsigned)
7059 instruct loadUS(rRegI dst, memory mem)
7060 %{
7061 match(Set dst (LoadUS mem));
7062
7063 ins_cost(125);
7064 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7065
7066 ins_encode %{
7067 __ movzwl($dst$$Register, $mem$$Address);
7068 %}
7069
7070 ins_pipe(ialu_reg_mem);
7071 %}
7072
7073 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7074 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7075 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7076
7077 ins_cost(125);
7078 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7079 ins_encode %{
7080 __ movsbl($dst$$Register, $mem$$Address);
7081 %}
7082 ins_pipe(ialu_reg_mem);
7083 %}
7084
7085 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7086 instruct loadUS2L(rRegL dst, memory mem)
7087 %{
7088 match(Set dst (ConvI2L (LoadUS mem)));
7089
7090 ins_cost(125);
7091 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7092
7093 ins_encode %{
7094 __ movzwq($dst$$Register, $mem$$Address);
7095 %}
7096
7097 ins_pipe(ialu_reg_mem);
7098 %}
7099
7100 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7101 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7102 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7103
7104 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7105 ins_encode %{
7106 __ movzbq($dst$$Register, $mem$$Address);
7107 %}
7108 ins_pipe(ialu_reg_mem);
7109 %}
7110
7111 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7112 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7113 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7114 effect(KILL cr);
7115
7116 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7117 "andl $dst, right_n_bits($mask, 16)" %}
7118 ins_encode %{
7119 Register Rdst = $dst$$Register;
7120 __ movzwq(Rdst, $mem$$Address);
7121 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7122 %}
7123 ins_pipe(ialu_reg_mem);
7124 %}
7125
7126 // Load Integer
7127 instruct loadI(rRegI dst, memory mem)
7128 %{
7129 match(Set dst (LoadI mem));
7130
7131 ins_cost(125);
7132 format %{ "movl $dst, $mem\t# int" %}
7133
7134 ins_encode %{
7135 __ movl($dst$$Register, $mem$$Address);
7136 %}
7137
7138 ins_pipe(ialu_reg_mem);
7139 %}
7140
7141 // Load Integer (32 bit signed) to Byte (8 bit signed)
7142 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7143 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7144
7145 ins_cost(125);
7146 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7147 ins_encode %{
7148 __ movsbl($dst$$Register, $mem$$Address);
7149 %}
7150 ins_pipe(ialu_reg_mem);
7151 %}
7152
7153 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7154 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7155 match(Set dst (AndI (LoadI mem) mask));
7156
7157 ins_cost(125);
7158 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7159 ins_encode %{
7160 __ movzbl($dst$$Register, $mem$$Address);
7161 %}
7162 ins_pipe(ialu_reg_mem);
7163 %}
7164
7165 // Load Integer (32 bit signed) to Short (16 bit signed)
7166 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7167 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7168
7169 ins_cost(125);
7170 format %{ "movswl $dst, $mem\t# int -> short" %}
7171 ins_encode %{
7172 __ movswl($dst$$Register, $mem$$Address);
7173 %}
7174 ins_pipe(ialu_reg_mem);
7175 %}
7176
7177 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7178 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7179 match(Set dst (AndI (LoadI mem) mask));
7180
7181 ins_cost(125);
7182 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7183 ins_encode %{
7184 __ movzwl($dst$$Register, $mem$$Address);
7185 %}
7186 ins_pipe(ialu_reg_mem);
7187 %}
7188
7189 // Load Integer into Long Register
7190 instruct loadI2L(rRegL dst, memory mem)
7191 %{
7192 match(Set dst (ConvI2L (LoadI mem)));
7193
7194 ins_cost(125);
7195 format %{ "movslq $dst, $mem\t# int -> long" %}
7196
7197 ins_encode %{
7198 __ movslq($dst$$Register, $mem$$Address);
7199 %}
7200
7201 ins_pipe(ialu_reg_mem);
7202 %}
7203
7204 // Load Integer with mask 0xFF into Long Register
7205 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7206 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7207
7208 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7209 ins_encode %{
7210 __ movzbq($dst$$Register, $mem$$Address);
7211 %}
7212 ins_pipe(ialu_reg_mem);
7213 %}
7214
7215 // Load Integer with mask 0xFFFF into Long Register
7216 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7217 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7218
7219 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7220 ins_encode %{
7221 __ movzwq($dst$$Register, $mem$$Address);
7222 %}
7223 ins_pipe(ialu_reg_mem);
7224 %}
7225
7226 // Load Integer with a 31-bit mask into Long Register
7227 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7228 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7229 effect(KILL cr);
7230
7231 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7232 "andl $dst, $mask" %}
7233 ins_encode %{
7234 Register Rdst = $dst$$Register;
7235 __ movl(Rdst, $mem$$Address);
7236 __ andl(Rdst, $mask$$constant);
7237 %}
7238 ins_pipe(ialu_reg_mem);
7239 %}
7240
7241 // Load Unsigned Integer into Long Register
7242 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7243 %{
7244 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7245
7246 ins_cost(125);
7247 format %{ "movl $dst, $mem\t# uint -> long" %}
7248
7249 ins_encode %{
7250 __ movl($dst$$Register, $mem$$Address);
7251 %}
7252
7253 ins_pipe(ialu_reg_mem);
7254 %}
7255
7256 // Load Long
7257 instruct loadL(rRegL dst, memory mem)
7258 %{
7259 match(Set dst (LoadL mem));
7260
7261 ins_cost(125);
7262 format %{ "movq $dst, $mem\t# long" %}
7263
7264 ins_encode %{
7265 __ movq($dst$$Register, $mem$$Address);
7266 %}
7267
7268 ins_pipe(ialu_reg_mem); // XXX
7269 %}
7270
7271 // Load Range
7272 instruct loadRange(rRegI dst, memory mem)
7273 %{
7274 match(Set dst (LoadRange mem));
7275
7276 ins_cost(125); // XXX
7277 format %{ "movl $dst, $mem\t# range" %}
7278 ins_encode %{
7279 __ movl($dst$$Register, $mem$$Address);
7280 %}
7281 ins_pipe(ialu_reg_mem);
7282 %}
7283
7284 // Load Pointer
7285 instruct loadP(rRegP dst, memory mem)
7286 %{
7287 match(Set dst (LoadP mem));
7288 predicate(n->as_Load()->barrier_data() == 0);
7289
7290 ins_cost(125); // XXX
7291 format %{ "movq $dst, $mem\t# ptr" %}
7292 ins_encode %{
7293 __ movq($dst$$Register, $mem$$Address);
7294 %}
7295 ins_pipe(ialu_reg_mem); // XXX
7296 %}
7297
7298 // Load Compressed Pointer
7299 instruct loadN(rRegN dst, memory mem)
7300 %{
7301 predicate(n->as_Load()->barrier_data() == 0);
7302 match(Set dst (LoadN mem));
7303
7304 ins_cost(125); // XXX
7305 format %{ "movl $dst, $mem\t# compressed ptr" %}
7306 ins_encode %{
7307 __ movl($dst$$Register, $mem$$Address);
7308 %}
7309 ins_pipe(ialu_reg_mem); // XXX
7310 %}
7311
7312
7313 // Load Klass Pointer
7314 instruct loadKlass(rRegP dst, memory mem)
7315 %{
7316 match(Set dst (LoadKlass mem));
7317
7318 ins_cost(125); // XXX
7319 format %{ "movq $dst, $mem\t# class" %}
7320 ins_encode %{
7321 __ movq($dst$$Register, $mem$$Address);
7322 %}
7323 ins_pipe(ialu_reg_mem); // XXX
7324 %}
7325
7326 // Load narrow Klass Pointer
7327 instruct loadNKlass(rRegN dst, memory mem)
7328 %{
7329 predicate(!UseCompactObjectHeaders);
7330 match(Set dst (LoadNKlass mem));
7331
7332 ins_cost(125); // XXX
7333 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7334 ins_encode %{
7335 __ movl($dst$$Register, $mem$$Address);
7336 %}
7337 ins_pipe(ialu_reg_mem); // XXX
7338 %}
7339
7340 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7341 %{
7342 predicate(UseCompactObjectHeaders);
7343 match(Set dst (LoadNKlass mem));
7344 effect(KILL cr);
7345 ins_cost(125);
7346 format %{
7347 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7348 "shrl $dst, markWord::klass_shift_at_offset"
7349 %}
7350 ins_encode %{
7351 if (UseAPX) {
7352 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7353 }
7354 else {
7355 __ movl($dst$$Register, $mem$$Address);
7356 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7357 }
7358 %}
7359 ins_pipe(ialu_reg_mem);
7360 %}
7361
7362 // Load Float
7363 instruct loadF(regF dst, memory mem)
7364 %{
7365 match(Set dst (LoadF mem));
7366
7367 ins_cost(145); // XXX
7368 format %{ "movss $dst, $mem\t# float" %}
7369 ins_encode %{
7370 __ movflt($dst$$XMMRegister, $mem$$Address);
7371 %}
7372 ins_pipe(pipe_slow); // XXX
7373 %}
7374
7375 // Load Double
7376 instruct loadD_partial(regD dst, memory mem)
7377 %{
7378 predicate(!UseXmmLoadAndClearUpper);
7379 match(Set dst (LoadD mem));
7380
7381 ins_cost(145); // XXX
7382 format %{ "movlpd $dst, $mem\t# double" %}
7383 ins_encode %{
7384 __ movdbl($dst$$XMMRegister, $mem$$Address);
7385 %}
7386 ins_pipe(pipe_slow); // XXX
7387 %}
7388
7389 instruct loadD(regD dst, memory mem)
7390 %{
7391 predicate(UseXmmLoadAndClearUpper);
7392 match(Set dst (LoadD mem));
7393
7394 ins_cost(145); // XXX
7395 format %{ "movsd $dst, $mem\t# double" %}
7396 ins_encode %{
7397 __ movdbl($dst$$XMMRegister, $mem$$Address);
7398 %}
7399 ins_pipe(pipe_slow); // XXX
7400 %}
7401
7402 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7403 %{
7404 match(Set dst con);
7405
7406 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7407
7408 ins_encode %{
7409 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7410 %}
7411
7412 ins_pipe(ialu_reg_fat);
7413 %}
7414
7415 // min = java.lang.Math.min(float a, float b)
7416 // max = java.lang.Math.max(float a, float b)
7417 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7418 %{
7419 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7420 match(Set dst (MaxF a b));
7421 match(Set dst (MinF a b));
7422
7423 format %{ "minmaxF $dst, $a, $b" %}
7424 ins_encode %{
7425 int opcode = this->ideal_Opcode();
7426 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7427 %}
7428 ins_pipe( pipe_slow );
7429 %}
7430
7431 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
7432 %{
7433 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7434 match(Set dst (MaxF a b));
7435 match(Set dst (MinF a b));
7436 effect(USE a, USE b, TEMP rtmp, KILL cr);
7437
7438 format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7439 ins_encode %{
7440 int opcode = this->ideal_Opcode();
7441 bool min = (opcode == Op_MinF) ? true : false;
7442 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7443 min, fp_prec_flt /*pt*/);
7444 %}
7445 ins_pipe( pipe_slow );
7446 %}
7447
7448 // min = java.lang.Math.min(float a, float b)
7449 // max = java.lang.Math.max(float a, float b)
7450 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7451 %{
7452 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7453 match(Set dst (MaxF a b));
7454 match(Set dst (MinF a b));
7455 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7456
7457 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7458 ins_encode %{
7459 int opcode = this->ideal_Opcode();
7460 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7461 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7462 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7463 %}
7464 ins_pipe( pipe_slow );
7465 %}
7466
7467 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
7468 %{
7469 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7470 match(Set dst (MaxF a b));
7471 match(Set dst (MinF a b));
7472 effect(USE a, USE b, TEMP rtmp, KILL cr);
7473
7474 format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
7475 ins_encode %{
7476 int opcode = this->ideal_Opcode();
7477 bool min = (opcode == Op_MinF) ? true : false;
7478 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7479 min, fp_prec_flt /*pt*/);
7480 %}
7481 ins_pipe( pipe_slow );
7482 %}
7483
7484 // min = java.lang.Math.min(double a, double b)
7485 // max = java.lang.Math.max(double a, double b)
7486 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7487 %{
7488 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7489 match(Set dst (MaxD a b));
7490 match(Set dst (MinD a b));
7491
7492 format %{ "minmaxD $dst, $a, $b" %}
7493 ins_encode %{
7494 int opcode = this->ideal_Opcode();
7495 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7496 %}
7497 ins_pipe( pipe_slow );
7498 %}
7499
7500 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
7501 %{
7502 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7503 match(Set dst (MaxD a b));
7504 match(Set dst (MinD a b));
7505 effect(USE a, USE b, TEMP rtmp, KILL cr);
7506
7507 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7508 ins_encode %{
7509 int opcode = this->ideal_Opcode();
7510 bool min = (opcode == Op_MinD) ? true : false;
7511 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7512 min, fp_prec_dbl /*pt*/);
7513 %}
7514 ins_pipe( pipe_slow );
7515 %}
7516
7517 // min = java.lang.Math.min(double a, double b)
7518 // max = java.lang.Math.max(double a, double b)
7519 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7520 %{
7521 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7522 match(Set dst (MaxD a b));
7523 match(Set dst (MinD a b));
7524 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7525
7526 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7527 ins_encode %{
7528 int opcode = this->ideal_Opcode();
7529 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7530 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7531 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7532 %}
7533 ins_pipe( pipe_slow );
7534 %}
7535
7536 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
7537 %{
7538 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7539 match(Set dst (MaxD a b));
7540 match(Set dst (MinD a b));
7541 effect(USE a, USE b, TEMP rtmp, KILL cr);
7542
7543 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7544 ins_encode %{
7545 int opcode = this->ideal_Opcode();
7546 bool min = (opcode == Op_MinD) ? true : false;
7547 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7548 min, fp_prec_dbl /*pt*/);
7549 %}
7550 ins_pipe( pipe_slow );
7551 %}
7552
7553 // Load Effective Address
7554 instruct leaP8(rRegP dst, indOffset8 mem)
7555 %{
7556 match(Set dst mem);
7557
7558 ins_cost(110); // XXX
7559 format %{ "leaq $dst, $mem\t# ptr 8" %}
7560 ins_encode %{
7561 __ leaq($dst$$Register, $mem$$Address);
7562 %}
7563 ins_pipe(ialu_reg_reg_fat);
7564 %}
7565
7566 instruct leaP32(rRegP dst, indOffset32 mem)
7567 %{
7568 match(Set dst mem);
7569
7570 ins_cost(110);
7571 format %{ "leaq $dst, $mem\t# ptr 32" %}
7572 ins_encode %{
7573 __ leaq($dst$$Register, $mem$$Address);
7574 %}
7575 ins_pipe(ialu_reg_reg_fat);
7576 %}
7577
7578 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7579 %{
7580 match(Set dst mem);
7581
7582 ins_cost(110);
7583 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7584 ins_encode %{
7585 __ leaq($dst$$Register, $mem$$Address);
7586 %}
7587 ins_pipe(ialu_reg_reg_fat);
7588 %}
7589
7590 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7591 %{
7592 match(Set dst mem);
7593
7594 ins_cost(110);
7595 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7596 ins_encode %{
7597 __ leaq($dst$$Register, $mem$$Address);
7598 %}
7599 ins_pipe(ialu_reg_reg_fat);
7600 %}
7601
7602 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7603 %{
7604 match(Set dst mem);
7605
7606 ins_cost(110);
7607 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7608 ins_encode %{
7609 __ leaq($dst$$Register, $mem$$Address);
7610 %}
7611 ins_pipe(ialu_reg_reg_fat);
7612 %}
7613
7614 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7615 %{
7616 match(Set dst mem);
7617
7618 ins_cost(110);
7619 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7620 ins_encode %{
7621 __ leaq($dst$$Register, $mem$$Address);
7622 %}
7623 ins_pipe(ialu_reg_reg_fat);
7624 %}
7625
7626 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7627 %{
7628 match(Set dst mem);
7629
7630 ins_cost(110);
7631 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7632 ins_encode %{
7633 __ leaq($dst$$Register, $mem$$Address);
7634 %}
7635 ins_pipe(ialu_reg_reg_fat);
7636 %}
7637
7638 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7639 %{
7640 match(Set dst mem);
7641
7642 ins_cost(110);
7643 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7644 ins_encode %{
7645 __ leaq($dst$$Register, $mem$$Address);
7646 %}
7647 ins_pipe(ialu_reg_reg_fat);
7648 %}
7649
7650 // Load Effective Address which uses Narrow (32-bits) oop
7651 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7652 %{
7653 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7654 match(Set dst mem);
7655
7656 ins_cost(110);
7657 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7658 ins_encode %{
7659 __ leaq($dst$$Register, $mem$$Address);
7660 %}
7661 ins_pipe(ialu_reg_reg_fat);
7662 %}
7663
7664 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7665 %{
7666 predicate(CompressedOops::shift() == 0);
7667 match(Set dst mem);
7668
7669 ins_cost(110); // XXX
7670 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7671 ins_encode %{
7672 __ leaq($dst$$Register, $mem$$Address);
7673 %}
7674 ins_pipe(ialu_reg_reg_fat);
7675 %}
7676
7677 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7678 %{
7679 predicate(CompressedOops::shift() == 0);
7680 match(Set dst mem);
7681
7682 ins_cost(110);
7683 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7684 ins_encode %{
7685 __ leaq($dst$$Register, $mem$$Address);
7686 %}
7687 ins_pipe(ialu_reg_reg_fat);
7688 %}
7689
7690 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7691 %{
7692 predicate(CompressedOops::shift() == 0);
7693 match(Set dst mem);
7694
7695 ins_cost(110);
7696 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7697 ins_encode %{
7698 __ leaq($dst$$Register, $mem$$Address);
7699 %}
7700 ins_pipe(ialu_reg_reg_fat);
7701 %}
7702
7703 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7704 %{
7705 predicate(CompressedOops::shift() == 0);
7706 match(Set dst mem);
7707
7708 ins_cost(110);
7709 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7710 ins_encode %{
7711 __ leaq($dst$$Register, $mem$$Address);
7712 %}
7713 ins_pipe(ialu_reg_reg_fat);
7714 %}
7715
7716 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7717 %{
7718 predicate(CompressedOops::shift() == 0);
7719 match(Set dst mem);
7720
7721 ins_cost(110);
7722 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7723 ins_encode %{
7724 __ leaq($dst$$Register, $mem$$Address);
7725 %}
7726 ins_pipe(ialu_reg_reg_fat);
7727 %}
7728
7729 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7730 %{
7731 predicate(CompressedOops::shift() == 0);
7732 match(Set dst mem);
7733
7734 ins_cost(110);
7735 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7736 ins_encode %{
7737 __ leaq($dst$$Register, $mem$$Address);
7738 %}
7739 ins_pipe(ialu_reg_reg_fat);
7740 %}
7741
7742 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7743 %{
7744 predicate(CompressedOops::shift() == 0);
7745 match(Set dst mem);
7746
7747 ins_cost(110);
7748 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7749 ins_encode %{
7750 __ leaq($dst$$Register, $mem$$Address);
7751 %}
7752 ins_pipe(ialu_reg_reg_fat);
7753 %}
7754
7755 instruct loadConI(rRegI dst, immI src)
7756 %{
7757 match(Set dst src);
7758
7759 format %{ "movl $dst, $src\t# int" %}
7760 ins_encode %{
7761 __ movl($dst$$Register, $src$$constant);
7762 %}
7763 ins_pipe(ialu_reg_fat); // XXX
7764 %}
7765
7766 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7767 %{
7768 match(Set dst src);
7769 effect(KILL cr);
7770
7771 ins_cost(50);
7772 format %{ "xorl $dst, $dst\t# int" %}
7773 ins_encode %{
7774 __ xorl($dst$$Register, $dst$$Register);
7775 %}
7776 ins_pipe(ialu_reg);
7777 %}
7778
7779 instruct loadConL(rRegL dst, immL src)
7780 %{
7781 match(Set dst src);
7782
7783 ins_cost(150);
7784 format %{ "movq $dst, $src\t# long" %}
7785 ins_encode %{
7786 __ mov64($dst$$Register, $src$$constant);
7787 %}
7788 ins_pipe(ialu_reg);
7789 %}
7790
7791 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7792 %{
7793 match(Set dst src);
7794 effect(KILL cr);
7795
7796 ins_cost(50);
7797 format %{ "xorl $dst, $dst\t# long" %}
7798 ins_encode %{
7799 __ xorl($dst$$Register, $dst$$Register);
7800 %}
7801 ins_pipe(ialu_reg); // XXX
7802 %}
7803
7804 instruct loadConUL32(rRegL dst, immUL32 src)
7805 %{
7806 match(Set dst src);
7807
7808 ins_cost(60);
7809 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7810 ins_encode %{
7811 __ movl($dst$$Register, $src$$constant);
7812 %}
7813 ins_pipe(ialu_reg);
7814 %}
7815
7816 instruct loadConL32(rRegL dst, immL32 src)
7817 %{
7818 match(Set dst src);
7819
7820 ins_cost(70);
7821 format %{ "movq $dst, $src\t# long (32-bit)" %}
7822 ins_encode %{
7823 __ movq($dst$$Register, $src$$constant);
7824 %}
7825 ins_pipe(ialu_reg);
7826 %}
7827
7828 instruct loadConP(rRegP dst, immP con) %{
7829 match(Set dst con);
7830
7831 format %{ "movq $dst, $con\t# ptr" %}
7832 ins_encode %{
7833 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7834 %}
7835 ins_pipe(ialu_reg_fat); // XXX
7836 %}
7837
7838 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7839 %{
7840 match(Set dst src);
7841 effect(KILL cr);
7842
7843 ins_cost(50);
7844 format %{ "xorl $dst, $dst\t# ptr" %}
7845 ins_encode %{
7846 __ xorl($dst$$Register, $dst$$Register);
7847 %}
7848 ins_pipe(ialu_reg);
7849 %}
7850
7851 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7852 %{
7853 match(Set dst src);
7854 effect(KILL cr);
7855
7856 ins_cost(60);
7857 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7858 ins_encode %{
7859 __ movl($dst$$Register, $src$$constant);
7860 %}
7861 ins_pipe(ialu_reg);
7862 %}
7863
7864 instruct loadConF(regF dst, immF con) %{
7865 match(Set dst con);
7866 ins_cost(125);
7867 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7868 ins_encode %{
7869 __ movflt($dst$$XMMRegister, $constantaddress($con));
7870 %}
7871 ins_pipe(pipe_slow);
7872 %}
7873
7874 instruct loadConH(regF dst, immH con) %{
7875 match(Set dst con);
7876 ins_cost(125);
7877 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7878 ins_encode %{
7879 __ movflt($dst$$XMMRegister, $constantaddress($con));
7880 %}
7881 ins_pipe(pipe_slow);
7882 %}
7883
7884 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7885 match(Set dst src);
7886 effect(KILL cr);
7887 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7888 ins_encode %{
7889 __ xorq($dst$$Register, $dst$$Register);
7890 %}
7891 ins_pipe(ialu_reg);
7892 %}
7893
7894 instruct loadConN(rRegN dst, immN src) %{
7895 match(Set dst src);
7896
7897 ins_cost(125);
7898 format %{ "movl $dst, $src\t# compressed ptr" %}
7899 ins_encode %{
7900 address con = (address)$src$$constant;
7901 if (con == nullptr) {
7902 ShouldNotReachHere();
7903 } else {
7904 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7905 }
7906 %}
7907 ins_pipe(ialu_reg_fat); // XXX
7908 %}
7909
7910 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7911 match(Set dst src);
7912
7913 ins_cost(125);
7914 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7915 ins_encode %{
7916 address con = (address)$src$$constant;
7917 if (con == nullptr) {
7918 ShouldNotReachHere();
7919 } else {
7920 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7921 }
7922 %}
7923 ins_pipe(ialu_reg_fat); // XXX
7924 %}
7925
7926 instruct loadConF0(regF dst, immF0 src)
7927 %{
7928 match(Set dst src);
7929 ins_cost(100);
7930
7931 format %{ "xorps $dst, $dst\t# float 0.0" %}
7932 ins_encode %{
7933 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7934 %}
7935 ins_pipe(pipe_slow);
7936 %}
7937
7938 // Use the same format since predicate() can not be used here.
7939 instruct loadConD(regD dst, immD con) %{
7940 match(Set dst con);
7941 ins_cost(125);
7942 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7943 ins_encode %{
7944 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7945 %}
7946 ins_pipe(pipe_slow);
7947 %}
7948
7949 instruct loadConD0(regD dst, immD0 src)
7950 %{
7951 match(Set dst src);
7952 ins_cost(100);
7953
7954 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7955 ins_encode %{
7956 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7957 %}
7958 ins_pipe(pipe_slow);
7959 %}
7960
7961 instruct loadSSI(rRegI dst, stackSlotI src)
7962 %{
7963 match(Set dst src);
7964
7965 ins_cost(125);
7966 format %{ "movl $dst, $src\t# int stk" %}
7967 ins_encode %{
7968 __ movl($dst$$Register, $src$$Address);
7969 %}
7970 ins_pipe(ialu_reg_mem);
7971 %}
7972
7973 instruct loadSSL(rRegL dst, stackSlotL src)
7974 %{
7975 match(Set dst src);
7976
7977 ins_cost(125);
7978 format %{ "movq $dst, $src\t# long stk" %}
7979 ins_encode %{
7980 __ movq($dst$$Register, $src$$Address);
7981 %}
7982 ins_pipe(ialu_reg_mem);
7983 %}
7984
7985 instruct loadSSP(rRegP dst, stackSlotP src)
7986 %{
7987 match(Set dst src);
7988
7989 ins_cost(125);
7990 format %{ "movq $dst, $src\t# ptr stk" %}
7991 ins_encode %{
7992 __ movq($dst$$Register, $src$$Address);
7993 %}
7994 ins_pipe(ialu_reg_mem);
7995 %}
7996
7997 instruct loadSSF(regF dst, stackSlotF src)
7998 %{
7999 match(Set dst src);
8000
8001 ins_cost(125);
8002 format %{ "movss $dst, $src\t# float stk" %}
8003 ins_encode %{
8004 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
8005 %}
8006 ins_pipe(pipe_slow); // XXX
8007 %}
8008
8009 // Use the same format since predicate() can not be used here.
8010 instruct loadSSD(regD dst, stackSlotD src)
8011 %{
8012 match(Set dst src);
8013
8014 ins_cost(125);
8015 format %{ "movsd $dst, $src\t# double stk" %}
8016 ins_encode %{
8017 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
8018 %}
8019 ins_pipe(pipe_slow); // XXX
8020 %}
8021
8022 // Prefetch instructions for allocation.
8023 // Must be safe to execute with invalid address (cannot fault).
8024
8025 instruct prefetchAlloc( memory mem ) %{
8026 predicate(AllocatePrefetchInstr==3);
8027 match(PrefetchAllocation mem);
8028 ins_cost(125);
8029
8030 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
8031 ins_encode %{
8032 __ prefetchw($mem$$Address);
8033 %}
8034 ins_pipe(ialu_mem);
8035 %}
8036
8037 instruct prefetchAllocNTA( memory mem ) %{
8038 predicate(AllocatePrefetchInstr==0);
8039 match(PrefetchAllocation mem);
8040 ins_cost(125);
8041
8042 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
8043 ins_encode %{
8044 __ prefetchnta($mem$$Address);
8045 %}
8046 ins_pipe(ialu_mem);
8047 %}
8048
8049 instruct prefetchAllocT0( memory mem ) %{
8050 predicate(AllocatePrefetchInstr==1);
8051 match(PrefetchAllocation mem);
8052 ins_cost(125);
8053
8054 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8055 ins_encode %{
8056 __ prefetcht0($mem$$Address);
8057 %}
8058 ins_pipe(ialu_mem);
8059 %}
8060
8061 instruct prefetchAllocT2( memory mem ) %{
8062 predicate(AllocatePrefetchInstr==2);
8063 match(PrefetchAllocation mem);
8064 ins_cost(125);
8065
8066 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8067 ins_encode %{
8068 __ prefetcht2($mem$$Address);
8069 %}
8070 ins_pipe(ialu_mem);
8071 %}
8072
8073 //----------Store Instructions-------------------------------------------------
8074
8075 // Store Byte
8076 instruct storeB(memory mem, rRegI src)
8077 %{
8078 match(Set mem (StoreB mem src));
8079
8080 ins_cost(125); // XXX
8081 format %{ "movb $mem, $src\t# byte" %}
8082 ins_encode %{
8083 __ movb($mem$$Address, $src$$Register);
8084 %}
8085 ins_pipe(ialu_mem_reg);
8086 %}
8087
8088 // Store Char/Short
8089 instruct storeC(memory mem, rRegI src)
8090 %{
8091 match(Set mem (StoreC mem src));
8092
8093 ins_cost(125); // XXX
8094 format %{ "movw $mem, $src\t# char/short" %}
8095 ins_encode %{
8096 __ movw($mem$$Address, $src$$Register);
8097 %}
8098 ins_pipe(ialu_mem_reg);
8099 %}
8100
8101 // Store Integer
8102 instruct storeI(memory mem, rRegI src)
8103 %{
8104 match(Set mem (StoreI mem src));
8105
8106 ins_cost(125); // XXX
8107 format %{ "movl $mem, $src\t# int" %}
8108 ins_encode %{
8109 __ movl($mem$$Address, $src$$Register);
8110 %}
8111 ins_pipe(ialu_mem_reg);
8112 %}
8113
8114 // Store Long
8115 instruct storeL(memory mem, rRegL src)
8116 %{
8117 match(Set mem (StoreL mem src));
8118
8119 ins_cost(125); // XXX
8120 format %{ "movq $mem, $src\t# long" %}
8121 ins_encode %{
8122 __ movq($mem$$Address, $src$$Register);
8123 %}
8124 ins_pipe(ialu_mem_reg); // XXX
8125 %}
8126
8127 // Store Pointer
8128 instruct storeP(memory mem, any_RegP src)
8129 %{
8130 predicate(n->as_Store()->barrier_data() == 0);
8131 match(Set mem (StoreP mem src));
8132
8133 ins_cost(125); // XXX
8134 format %{ "movq $mem, $src\t# ptr" %}
8135 ins_encode %{
8136 __ movq($mem$$Address, $src$$Register);
8137 %}
8138 ins_pipe(ialu_mem_reg);
8139 %}
8140
8141 instruct storeImmP0(memory mem, immP0 zero)
8142 %{
8143 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8144 match(Set mem (StoreP mem zero));
8145
8146 ins_cost(125); // XXX
8147 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8148 ins_encode %{
8149 __ movq($mem$$Address, r12);
8150 %}
8151 ins_pipe(ialu_mem_reg);
8152 %}
8153
8154 // Store Null Pointer, mark word, or other simple pointer constant.
8155 instruct storeImmP(memory mem, immP31 src)
8156 %{
8157 predicate(n->as_Store()->barrier_data() == 0);
8158 match(Set mem (StoreP mem src));
8159
8160 ins_cost(150); // XXX
8161 format %{ "movq $mem, $src\t# ptr" %}
8162 ins_encode %{
8163 __ movq($mem$$Address, $src$$constant);
8164 %}
8165 ins_pipe(ialu_mem_imm);
8166 %}
8167
8168 // Store Compressed Pointer
8169 instruct storeN(memory mem, rRegN src)
8170 %{
8171 predicate(n->as_Store()->barrier_data() == 0);
8172 match(Set mem (StoreN mem src));
8173
8174 ins_cost(125); // XXX
8175 format %{ "movl $mem, $src\t# compressed ptr" %}
8176 ins_encode %{
8177 __ movl($mem$$Address, $src$$Register);
8178 %}
8179 ins_pipe(ialu_mem_reg);
8180 %}
8181
8182 instruct storeNKlass(memory mem, rRegN src)
8183 %{
8184 match(Set mem (StoreNKlass mem src));
8185
8186 ins_cost(125); // XXX
8187 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8188 ins_encode %{
8189 __ movl($mem$$Address, $src$$Register);
8190 %}
8191 ins_pipe(ialu_mem_reg);
8192 %}
8193
8194 instruct storeImmN0(memory mem, immN0 zero)
8195 %{
8196 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8197 match(Set mem (StoreN mem zero));
8198
8199 ins_cost(125); // XXX
8200 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8201 ins_encode %{
8202 __ movl($mem$$Address, r12);
8203 %}
8204 ins_pipe(ialu_mem_reg);
8205 %}
8206
8207 instruct storeImmN(memory mem, immN src)
8208 %{
8209 predicate(n->as_Store()->barrier_data() == 0);
8210 match(Set mem (StoreN mem src));
8211
8212 ins_cost(150); // XXX
8213 format %{ "movl $mem, $src\t# compressed ptr" %}
8214 ins_encode %{
8215 address con = (address)$src$$constant;
8216 if (con == nullptr) {
8217 __ movl($mem$$Address, 0);
8218 } else {
8219 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8220 }
8221 %}
8222 ins_pipe(ialu_mem_imm);
8223 %}
8224
8225 instruct storeImmNKlass(memory mem, immNKlass src)
8226 %{
8227 match(Set mem (StoreNKlass mem src));
8228
8229 ins_cost(150); // XXX
8230 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8231 ins_encode %{
8232 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8233 %}
8234 ins_pipe(ialu_mem_imm);
8235 %}
8236
8237 // Store Integer Immediate
8238 instruct storeImmI0(memory mem, immI_0 zero)
8239 %{
8240 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8241 match(Set mem (StoreI mem zero));
8242
8243 ins_cost(125); // XXX
8244 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8245 ins_encode %{
8246 __ movl($mem$$Address, r12);
8247 %}
8248 ins_pipe(ialu_mem_reg);
8249 %}
8250
8251 instruct storeImmI(memory mem, immI src)
8252 %{
8253 match(Set mem (StoreI mem src));
8254
8255 ins_cost(150);
8256 format %{ "movl $mem, $src\t# int" %}
8257 ins_encode %{
8258 __ movl($mem$$Address, $src$$constant);
8259 %}
8260 ins_pipe(ialu_mem_imm);
8261 %}
8262
8263 // Store Long Immediate
8264 instruct storeImmL0(memory mem, immL0 zero)
8265 %{
8266 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8267 match(Set mem (StoreL mem zero));
8268
8269 ins_cost(125); // XXX
8270 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8271 ins_encode %{
8272 __ movq($mem$$Address, r12);
8273 %}
8274 ins_pipe(ialu_mem_reg);
8275 %}
8276
8277 instruct storeImmL(memory mem, immL32 src)
8278 %{
8279 match(Set mem (StoreL mem src));
8280
8281 ins_cost(150);
8282 format %{ "movq $mem, $src\t# long" %}
8283 ins_encode %{
8284 __ movq($mem$$Address, $src$$constant);
8285 %}
8286 ins_pipe(ialu_mem_imm);
8287 %}
8288
8289 // Store Short/Char Immediate
8290 instruct storeImmC0(memory mem, immI_0 zero)
8291 %{
8292 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8293 match(Set mem (StoreC mem zero));
8294
8295 ins_cost(125); // XXX
8296 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8297 ins_encode %{
8298 __ movw($mem$$Address, r12);
8299 %}
8300 ins_pipe(ialu_mem_reg);
8301 %}
8302
8303 instruct storeImmI16(memory mem, immI16 src)
8304 %{
8305 predicate(UseStoreImmI16);
8306 match(Set mem (StoreC mem src));
8307
8308 ins_cost(150);
8309 format %{ "movw $mem, $src\t# short/char" %}
8310 ins_encode %{
8311 __ movw($mem$$Address, $src$$constant);
8312 %}
8313 ins_pipe(ialu_mem_imm);
8314 %}
8315
8316 // Store Byte Immediate
8317 instruct storeImmB0(memory mem, immI_0 zero)
8318 %{
8319 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8320 match(Set mem (StoreB mem zero));
8321
8322 ins_cost(125); // XXX
8323 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8324 ins_encode %{
8325 __ movb($mem$$Address, r12);
8326 %}
8327 ins_pipe(ialu_mem_reg);
8328 %}
8329
8330 instruct storeImmB(memory mem, immI8 src)
8331 %{
8332 match(Set mem (StoreB mem src));
8333
8334 ins_cost(150); // XXX
8335 format %{ "movb $mem, $src\t# byte" %}
8336 ins_encode %{
8337 __ movb($mem$$Address, $src$$constant);
8338 %}
8339 ins_pipe(ialu_mem_imm);
8340 %}
8341
8342 // Store Float
8343 instruct storeF(memory mem, regF src)
8344 %{
8345 match(Set mem (StoreF mem src));
8346
8347 ins_cost(95); // XXX
8348 format %{ "movss $mem, $src\t# float" %}
8349 ins_encode %{
8350 __ movflt($mem$$Address, $src$$XMMRegister);
8351 %}
8352 ins_pipe(pipe_slow); // XXX
8353 %}
8354
8355 // Store immediate Float value (it is faster than store from XMM register)
8356 instruct storeF0(memory mem, immF0 zero)
8357 %{
8358 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8359 match(Set mem (StoreF mem zero));
8360
8361 ins_cost(25); // XXX
8362 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8363 ins_encode %{
8364 __ movl($mem$$Address, r12);
8365 %}
8366 ins_pipe(ialu_mem_reg);
8367 %}
8368
8369 instruct storeF_imm(memory mem, immF src)
8370 %{
8371 match(Set mem (StoreF mem src));
8372
8373 ins_cost(50);
8374 format %{ "movl $mem, $src\t# float" %}
8375 ins_encode %{
8376 __ movl($mem$$Address, jint_cast($src$$constant));
8377 %}
8378 ins_pipe(ialu_mem_imm);
8379 %}
8380
8381 // Store Double
8382 instruct storeD(memory mem, regD src)
8383 %{
8384 match(Set mem (StoreD mem src));
8385
8386 ins_cost(95); // XXX
8387 format %{ "movsd $mem, $src\t# double" %}
8388 ins_encode %{
8389 __ movdbl($mem$$Address, $src$$XMMRegister);
8390 %}
8391 ins_pipe(pipe_slow); // XXX
8392 %}
8393
8394 // Store immediate double 0.0 (it is faster than store from XMM register)
8395 instruct storeD0_imm(memory mem, immD0 src)
8396 %{
8397 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8398 match(Set mem (StoreD mem src));
8399
8400 ins_cost(50);
8401 format %{ "movq $mem, $src\t# double 0." %}
8402 ins_encode %{
8403 __ movq($mem$$Address, $src$$constant);
8404 %}
8405 ins_pipe(ialu_mem_imm);
8406 %}
8407
8408 instruct storeD0(memory mem, immD0 zero)
8409 %{
8410 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8411 match(Set mem (StoreD mem zero));
8412
8413 ins_cost(25); // XXX
8414 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8415 ins_encode %{
8416 __ movq($mem$$Address, r12);
8417 %}
8418 ins_pipe(ialu_mem_reg);
8419 %}
8420
8421 instruct storeSSI(stackSlotI dst, rRegI src)
8422 %{
8423 match(Set dst src);
8424
8425 ins_cost(100);
8426 format %{ "movl $dst, $src\t# int stk" %}
8427 ins_encode %{
8428 __ movl($dst$$Address, $src$$Register);
8429 %}
8430 ins_pipe( ialu_mem_reg );
8431 %}
8432
8433 instruct storeSSL(stackSlotL dst, rRegL src)
8434 %{
8435 match(Set dst src);
8436
8437 ins_cost(100);
8438 format %{ "movq $dst, $src\t# long stk" %}
8439 ins_encode %{
8440 __ movq($dst$$Address, $src$$Register);
8441 %}
8442 ins_pipe(ialu_mem_reg);
8443 %}
8444
8445 instruct storeSSP(stackSlotP dst, rRegP src)
8446 %{
8447 match(Set dst src);
8448
8449 ins_cost(100);
8450 format %{ "movq $dst, $src\t# ptr stk" %}
8451 ins_encode %{
8452 __ movq($dst$$Address, $src$$Register);
8453 %}
8454 ins_pipe(ialu_mem_reg);
8455 %}
8456
8457 instruct storeSSF(stackSlotF dst, regF src)
8458 %{
8459 match(Set dst src);
8460
8461 ins_cost(95); // XXX
8462 format %{ "movss $dst, $src\t# float stk" %}
8463 ins_encode %{
8464 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8465 %}
8466 ins_pipe(pipe_slow); // XXX
8467 %}
8468
8469 instruct storeSSD(stackSlotD dst, regD src)
8470 %{
8471 match(Set dst src);
8472
8473 ins_cost(95); // XXX
8474 format %{ "movsd $dst, $src\t# double stk" %}
8475 ins_encode %{
8476 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8477 %}
8478 ins_pipe(pipe_slow); // XXX
8479 %}
8480
8481 instruct cacheWB(indirect addr)
8482 %{
8483 predicate(VM_Version::supports_data_cache_line_flush());
8484 match(CacheWB addr);
8485
8486 ins_cost(100);
8487 format %{"cache wb $addr" %}
8488 ins_encode %{
8489 assert($addr->index_position() < 0, "should be");
8490 assert($addr$$disp == 0, "should be");
8491 __ cache_wb(Address($addr$$base$$Register, 0));
8492 %}
8493 ins_pipe(pipe_slow); // XXX
8494 %}
8495
8496 instruct cacheWBPreSync()
8497 %{
8498 predicate(VM_Version::supports_data_cache_line_flush());
8499 match(CacheWBPreSync);
8500
8501 ins_cost(100);
8502 format %{"cache wb presync" %}
8503 ins_encode %{
8504 __ cache_wbsync(true);
8505 %}
8506 ins_pipe(pipe_slow); // XXX
8507 %}
8508
8509 instruct cacheWBPostSync()
8510 %{
8511 predicate(VM_Version::supports_data_cache_line_flush());
8512 match(CacheWBPostSync);
8513
8514 ins_cost(100);
8515 format %{"cache wb postsync" %}
8516 ins_encode %{
8517 __ cache_wbsync(false);
8518 %}
8519 ins_pipe(pipe_slow); // XXX
8520 %}
8521
8522 //----------BSWAP Instructions-------------------------------------------------
8523 instruct bytes_reverse_int(rRegI dst) %{
8524 match(Set dst (ReverseBytesI dst));
8525
8526 format %{ "bswapl $dst" %}
8527 ins_encode %{
8528 __ bswapl($dst$$Register);
8529 %}
8530 ins_pipe( ialu_reg );
8531 %}
8532
8533 instruct bytes_reverse_long(rRegL dst) %{
8534 match(Set dst (ReverseBytesL dst));
8535
8536 format %{ "bswapq $dst" %}
8537 ins_encode %{
8538 __ bswapq($dst$$Register);
8539 %}
8540 ins_pipe( ialu_reg);
8541 %}
8542
8543 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8544 match(Set dst (ReverseBytesUS dst));
8545 effect(KILL cr);
8546
8547 format %{ "bswapl $dst\n\t"
8548 "shrl $dst,16\n\t" %}
8549 ins_encode %{
8550 __ bswapl($dst$$Register);
8551 __ shrl($dst$$Register, 16);
8552 %}
8553 ins_pipe( ialu_reg );
8554 %}
8555
8556 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8557 match(Set dst (ReverseBytesS dst));
8558 effect(KILL cr);
8559
8560 format %{ "bswapl $dst\n\t"
8561 "sar $dst,16\n\t" %}
8562 ins_encode %{
8563 __ bswapl($dst$$Register);
8564 __ sarl($dst$$Register, 16);
8565 %}
8566 ins_pipe( ialu_reg );
8567 %}
8568
8569 //---------- Zeros Count Instructions ------------------------------------------
8570
8571 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8572 predicate(UseCountLeadingZerosInstruction);
8573 match(Set dst (CountLeadingZerosI src));
8574 effect(KILL cr);
8575
8576 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8577 ins_encode %{
8578 __ lzcntl($dst$$Register, $src$$Register);
8579 %}
8580 ins_pipe(ialu_reg);
8581 %}
8582
8583 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8584 predicate(UseCountLeadingZerosInstruction);
8585 match(Set dst (CountLeadingZerosI (LoadI src)));
8586 effect(KILL cr);
8587 ins_cost(175);
8588 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8589 ins_encode %{
8590 __ lzcntl($dst$$Register, $src$$Address);
8591 %}
8592 ins_pipe(ialu_reg_mem);
8593 %}
8594
8595 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8596 predicate(!UseCountLeadingZerosInstruction);
8597 match(Set dst (CountLeadingZerosI src));
8598 effect(KILL cr);
8599
8600 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8601 "jnz skip\n\t"
8602 "movl $dst, -1\n"
8603 "skip:\n\t"
8604 "negl $dst\n\t"
8605 "addl $dst, 31" %}
8606 ins_encode %{
8607 Register Rdst = $dst$$Register;
8608 Register Rsrc = $src$$Register;
8609 Label skip;
8610 __ bsrl(Rdst, Rsrc);
8611 __ jccb(Assembler::notZero, skip);
8612 __ movl(Rdst, -1);
8613 __ bind(skip);
8614 __ negl(Rdst);
8615 __ addl(Rdst, BitsPerInt - 1);
8616 %}
8617 ins_pipe(ialu_reg);
8618 %}
8619
8620 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8621 predicate(UseCountLeadingZerosInstruction);
8622 match(Set dst (CountLeadingZerosL src));
8623 effect(KILL cr);
8624
8625 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8626 ins_encode %{
8627 __ lzcntq($dst$$Register, $src$$Register);
8628 %}
8629 ins_pipe(ialu_reg);
8630 %}
8631
8632 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8633 predicate(UseCountLeadingZerosInstruction);
8634 match(Set dst (CountLeadingZerosL (LoadL src)));
8635 effect(KILL cr);
8636 ins_cost(175);
8637 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8638 ins_encode %{
8639 __ lzcntq($dst$$Register, $src$$Address);
8640 %}
8641 ins_pipe(ialu_reg_mem);
8642 %}
8643
8644 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8645 predicate(!UseCountLeadingZerosInstruction);
8646 match(Set dst (CountLeadingZerosL src));
8647 effect(KILL cr);
8648
8649 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8650 "jnz skip\n\t"
8651 "movl $dst, -1\n"
8652 "skip:\n\t"
8653 "negl $dst\n\t"
8654 "addl $dst, 63" %}
8655 ins_encode %{
8656 Register Rdst = $dst$$Register;
8657 Register Rsrc = $src$$Register;
8658 Label skip;
8659 __ bsrq(Rdst, Rsrc);
8660 __ jccb(Assembler::notZero, skip);
8661 __ movl(Rdst, -1);
8662 __ bind(skip);
8663 __ negl(Rdst);
8664 __ addl(Rdst, BitsPerLong - 1);
8665 %}
8666 ins_pipe(ialu_reg);
8667 %}
8668
8669 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8670 predicate(UseCountTrailingZerosInstruction);
8671 match(Set dst (CountTrailingZerosI src));
8672 effect(KILL cr);
8673
8674 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8675 ins_encode %{
8676 __ tzcntl($dst$$Register, $src$$Register);
8677 %}
8678 ins_pipe(ialu_reg);
8679 %}
8680
8681 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8682 predicate(UseCountTrailingZerosInstruction);
8683 match(Set dst (CountTrailingZerosI (LoadI src)));
8684 effect(KILL cr);
8685 ins_cost(175);
8686 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8687 ins_encode %{
8688 __ tzcntl($dst$$Register, $src$$Address);
8689 %}
8690 ins_pipe(ialu_reg_mem);
8691 %}
8692
8693 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8694 predicate(!UseCountTrailingZerosInstruction);
8695 match(Set dst (CountTrailingZerosI src));
8696 effect(KILL cr);
8697
8698 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8699 "jnz done\n\t"
8700 "movl $dst, 32\n"
8701 "done:" %}
8702 ins_encode %{
8703 Register Rdst = $dst$$Register;
8704 Label done;
8705 __ bsfl(Rdst, $src$$Register);
8706 __ jccb(Assembler::notZero, done);
8707 __ movl(Rdst, BitsPerInt);
8708 __ bind(done);
8709 %}
8710 ins_pipe(ialu_reg);
8711 %}
8712
8713 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8714 predicate(UseCountTrailingZerosInstruction);
8715 match(Set dst (CountTrailingZerosL src));
8716 effect(KILL cr);
8717
8718 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8719 ins_encode %{
8720 __ tzcntq($dst$$Register, $src$$Register);
8721 %}
8722 ins_pipe(ialu_reg);
8723 %}
8724
8725 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8726 predicate(UseCountTrailingZerosInstruction);
8727 match(Set dst (CountTrailingZerosL (LoadL src)));
8728 effect(KILL cr);
8729 ins_cost(175);
8730 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8731 ins_encode %{
8732 __ tzcntq($dst$$Register, $src$$Address);
8733 %}
8734 ins_pipe(ialu_reg_mem);
8735 %}
8736
8737 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8738 predicate(!UseCountTrailingZerosInstruction);
8739 match(Set dst (CountTrailingZerosL src));
8740 effect(KILL cr);
8741
8742 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8743 "jnz done\n\t"
8744 "movl $dst, 64\n"
8745 "done:" %}
8746 ins_encode %{
8747 Register Rdst = $dst$$Register;
8748 Label done;
8749 __ bsfq(Rdst, $src$$Register);
8750 __ jccb(Assembler::notZero, done);
8751 __ movl(Rdst, BitsPerLong);
8752 __ bind(done);
8753 %}
8754 ins_pipe(ialu_reg);
8755 %}
8756
8757 //--------------- Reverse Operation Instructions ----------------
8758 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8759 predicate(!VM_Version::supports_gfni());
8760 match(Set dst (ReverseI src));
8761 effect(TEMP dst, TEMP rtmp, KILL cr);
8762 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8763 ins_encode %{
8764 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8765 %}
8766 ins_pipe( ialu_reg );
8767 %}
8768
8769 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8770 predicate(VM_Version::supports_gfni());
8771 match(Set dst (ReverseI src));
8772 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8773 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8774 ins_encode %{
8775 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8776 %}
8777 ins_pipe( ialu_reg );
8778 %}
8779
8780 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8781 predicate(!VM_Version::supports_gfni());
8782 match(Set dst (ReverseL src));
8783 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8784 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8785 ins_encode %{
8786 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8787 %}
8788 ins_pipe( ialu_reg );
8789 %}
8790
8791 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8792 predicate(VM_Version::supports_gfni());
8793 match(Set dst (ReverseL src));
8794 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8795 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8796 ins_encode %{
8797 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8798 %}
8799 ins_pipe( ialu_reg );
8800 %}
8801
8802 //---------- Population Count Instructions -------------------------------------
8803
8804 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8805 predicate(UsePopCountInstruction);
8806 match(Set dst (PopCountI src));
8807 effect(KILL cr);
8808
8809 format %{ "popcnt $dst, $src" %}
8810 ins_encode %{
8811 __ popcntl($dst$$Register, $src$$Register);
8812 %}
8813 ins_pipe(ialu_reg);
8814 %}
8815
8816 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8817 predicate(UsePopCountInstruction);
8818 match(Set dst (PopCountI (LoadI mem)));
8819 effect(KILL cr);
8820
8821 format %{ "popcnt $dst, $mem" %}
8822 ins_encode %{
8823 __ popcntl($dst$$Register, $mem$$Address);
8824 %}
8825 ins_pipe(ialu_reg);
8826 %}
8827
8828 // Note: Long.bitCount(long) returns an int.
8829 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8830 predicate(UsePopCountInstruction);
8831 match(Set dst (PopCountL src));
8832 effect(KILL cr);
8833
8834 format %{ "popcnt $dst, $src" %}
8835 ins_encode %{
8836 __ popcntq($dst$$Register, $src$$Register);
8837 %}
8838 ins_pipe(ialu_reg);
8839 %}
8840
8841 // Note: Long.bitCount(long) returns an int.
8842 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8843 predicate(UsePopCountInstruction);
8844 match(Set dst (PopCountL (LoadL mem)));
8845 effect(KILL cr);
8846
8847 format %{ "popcnt $dst, $mem" %}
8848 ins_encode %{
8849 __ popcntq($dst$$Register, $mem$$Address);
8850 %}
8851 ins_pipe(ialu_reg);
8852 %}
8853
8854
8855 //----------MemBar Instructions-----------------------------------------------
8856 // Memory barrier flavors
8857
8858 instruct membar_acquire()
8859 %{
8860 match(MemBarAcquire);
8861 match(LoadFence);
8862 ins_cost(0);
8863
8864 size(0);
8865 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8866 ins_encode();
8867 ins_pipe(empty);
8868 %}
8869
8870 instruct membar_acquire_lock()
8871 %{
8872 match(MemBarAcquireLock);
8873 ins_cost(0);
8874
8875 size(0);
8876 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8877 ins_encode();
8878 ins_pipe(empty);
8879 %}
8880
8881 instruct membar_release()
8882 %{
8883 match(MemBarRelease);
8884 match(StoreFence);
8885 ins_cost(0);
8886
8887 size(0);
8888 format %{ "MEMBAR-release ! (empty encoding)" %}
8889 ins_encode();
8890 ins_pipe(empty);
8891 %}
8892
8893 instruct membar_release_lock()
8894 %{
8895 match(MemBarReleaseLock);
8896 ins_cost(0);
8897
8898 size(0);
8899 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8900 ins_encode();
8901 ins_pipe(empty);
8902 %}
8903
8904 instruct membar_storeload(rFlagsReg cr) %{
8905 match(MemBarStoreLoad);
8906 effect(KILL cr);
8907 ins_cost(400);
8908
8909 format %{
8910 $$template
8911 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8912 %}
8913 ins_encode %{
8914 __ membar(Assembler::StoreLoad);
8915 %}
8916 ins_pipe(pipe_slow);
8917 %}
8918
8919 instruct membar_volatile(rFlagsReg cr) %{
8920 match(MemBarVolatile);
8921 effect(KILL cr);
8922 ins_cost(400);
8923
8924 format %{
8925 $$template
8926 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8927 %}
8928 ins_encode %{
8929 __ membar(Assembler::StoreLoad);
8930 %}
8931 ins_pipe(pipe_slow);
8932 %}
8933
8934 instruct unnecessary_membar_volatile()
8935 %{
8936 match(MemBarVolatile);
8937 predicate(Matcher::post_store_load_barrier(n));
8938 ins_cost(0);
8939
8940 size(0);
8941 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8942 ins_encode();
8943 ins_pipe(empty);
8944 %}
8945
8946 instruct membar_full(rFlagsReg cr) %{
8947 match(MemBarFull);
8948 effect(KILL cr);
8949 ins_cost(400);
8950
8951 format %{
8952 $$template
8953 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8954 %}
8955 ins_encode %{
8956 __ membar(Assembler::StoreLoad);
8957 %}
8958 ins_pipe(pipe_slow);
8959 %}
8960
8961 instruct membar_storestore() %{
8962 match(MemBarStoreStore);
8963 match(StoreStoreFence);
8964 ins_cost(0);
8965
8966 size(0);
8967 format %{ "MEMBAR-storestore (empty encoding)" %}
8968 ins_encode( );
8969 ins_pipe(empty);
8970 %}
8971
8972 //----------Move Instructions--------------------------------------------------
8973
8974 instruct castX2P(rRegP dst, rRegL src)
8975 %{
8976 match(Set dst (CastX2P src));
8977
8978 format %{ "movq $dst, $src\t# long->ptr" %}
8979 ins_encode %{
8980 if ($dst$$reg != $src$$reg) {
8981 __ movptr($dst$$Register, $src$$Register);
8982 }
8983 %}
8984 ins_pipe(ialu_reg_reg); // XXX
8985 %}
8986
8987 instruct castI2N(rRegN dst, rRegI src)
8988 %{
8989 match(Set dst (CastI2N src));
8990
8991 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8992 ins_encode %{
8993 if ($dst$$reg != $src$$reg) {
8994 __ movl($dst$$Register, $src$$Register);
8995 }
8996 %}
8997 ins_pipe(ialu_reg_reg); // XXX
8998 %}
8999
9000 instruct castN2X(rRegL dst, rRegN src)
9001 %{
9002 match(Set dst (CastP2X src));
9003
9004 format %{ "movq $dst, $src\t# ptr -> long" %}
9005 ins_encode %{
9006 if ($dst$$reg != $src$$reg) {
9007 __ movptr($dst$$Register, $src$$Register);
9008 }
9009 %}
9010 ins_pipe(ialu_reg_reg); // XXX
9011 %}
9012
9013 instruct castP2X(rRegL dst, rRegP src)
9014 %{
9015 match(Set dst (CastP2X src));
9016
9017 format %{ "movq $dst, $src\t# ptr -> long" %}
9018 ins_encode %{
9019 if ($dst$$reg != $src$$reg) {
9020 __ movptr($dst$$Register, $src$$Register);
9021 }
9022 %}
9023 ins_pipe(ialu_reg_reg); // XXX
9024 %}
9025
9026 // Convert oop into int for vectors alignment masking
9027 instruct convP2I(rRegI dst, rRegP src)
9028 %{
9029 match(Set dst (ConvL2I (CastP2X src)));
9030
9031 format %{ "movl $dst, $src\t# ptr -> int" %}
9032 ins_encode %{
9033 __ movl($dst$$Register, $src$$Register);
9034 %}
9035 ins_pipe(ialu_reg_reg); // XXX
9036 %}
9037
9038 // Convert compressed oop into int for vectors alignment masking
9039 // in case of 32bit oops (heap < 4Gb).
9040 instruct convN2I(rRegI dst, rRegN src)
9041 %{
9042 predicate(CompressedOops::shift() == 0);
9043 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9044
9045 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
9046 ins_encode %{
9047 __ movl($dst$$Register, $src$$Register);
9048 %}
9049 ins_pipe(ialu_reg_reg); // XXX
9050 %}
9051
9052 // Convert oop pointer into compressed form
9053 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
9054 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9055 match(Set dst (EncodeP src));
9056 effect(KILL cr);
9057 format %{ "encode_heap_oop $dst,$src" %}
9058 ins_encode %{
9059 Register s = $src$$Register;
9060 Register d = $dst$$Register;
9061 if (s != d) {
9062 __ movq(d, s);
9063 }
9064 __ encode_heap_oop(d);
9065 %}
9066 ins_pipe(ialu_reg_long);
9067 %}
9068
9069 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9070 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9071 match(Set dst (EncodeP src));
9072 effect(KILL cr);
9073 format %{ "encode_heap_oop_not_null $dst,$src" %}
9074 ins_encode %{
9075 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9076 %}
9077 ins_pipe(ialu_reg_long);
9078 %}
9079
9080 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9081 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9082 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9083 match(Set dst (DecodeN src));
9084 effect(KILL cr);
9085 format %{ "decode_heap_oop $dst,$src" %}
9086 ins_encode %{
9087 Register s = $src$$Register;
9088 Register d = $dst$$Register;
9089 if (s != d) {
9090 __ movq(d, s);
9091 }
9092 __ decode_heap_oop(d);
9093 %}
9094 ins_pipe(ialu_reg_long);
9095 %}
9096
9097 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9098 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9099 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9100 match(Set dst (DecodeN src));
9101 effect(KILL cr);
9102 format %{ "decode_heap_oop_not_null $dst,$src" %}
9103 ins_encode %{
9104 Register s = $src$$Register;
9105 Register d = $dst$$Register;
9106 if (s != d) {
9107 __ decode_heap_oop_not_null(d, s);
9108 } else {
9109 __ decode_heap_oop_not_null(d);
9110 }
9111 %}
9112 ins_pipe(ialu_reg_long);
9113 %}
9114
9115 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9116 match(Set dst (EncodePKlass src));
9117 effect(TEMP dst, KILL cr);
9118 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9119 ins_encode %{
9120 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9121 %}
9122 ins_pipe(ialu_reg_long);
9123 %}
9124
9125 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9126 match(Set dst (DecodeNKlass src));
9127 effect(TEMP dst, KILL cr);
9128 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9129 ins_encode %{
9130 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9131 %}
9132 ins_pipe(ialu_reg_long);
9133 %}
9134
9135 //----------Conditional Move---------------------------------------------------
9136 // Jump
9137 // dummy instruction for generating temp registers
9138 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9139 match(Jump (LShiftL switch_val shift));
9140 ins_cost(350);
9141 predicate(false);
9142 effect(TEMP dest);
9143
9144 format %{ "leaq $dest, [$constantaddress]\n\t"
9145 "jmp [$dest + $switch_val << $shift]\n\t" %}
9146 ins_encode %{
9147 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9148 // to do that and the compiler is using that register as one it can allocate.
9149 // So we build it all by hand.
9150 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9151 // ArrayAddress dispatch(table, index);
9152 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9153 __ lea($dest$$Register, $constantaddress);
9154 __ jmp(dispatch);
9155 %}
9156 ins_pipe(pipe_jmp);
9157 %}
9158
9159 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9160 match(Jump (AddL (LShiftL switch_val shift) offset));
9161 ins_cost(350);
9162 effect(TEMP dest);
9163
9164 format %{ "leaq $dest, [$constantaddress]\n\t"
9165 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9166 ins_encode %{
9167 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9168 // to do that and the compiler is using that register as one it can allocate.
9169 // So we build it all by hand.
9170 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9171 // ArrayAddress dispatch(table, index);
9172 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9173 __ lea($dest$$Register, $constantaddress);
9174 __ jmp(dispatch);
9175 %}
9176 ins_pipe(pipe_jmp);
9177 %}
9178
9179 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9180 match(Jump switch_val);
9181 ins_cost(350);
9182 effect(TEMP dest);
9183
9184 format %{ "leaq $dest, [$constantaddress]\n\t"
9185 "jmp [$dest + $switch_val]\n\t" %}
9186 ins_encode %{
9187 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9188 // to do that and the compiler is using that register as one it can allocate.
9189 // So we build it all by hand.
9190 // Address index(noreg, switch_reg, Address::times_1);
9191 // ArrayAddress dispatch(table, index);
9192 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9193 __ lea($dest$$Register, $constantaddress);
9194 __ jmp(dispatch);
9195 %}
9196 ins_pipe(pipe_jmp);
9197 %}
9198
9199 // Conditional move
9200 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9201 %{
9202 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9203 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9204
9205 ins_cost(100); // XXX
9206 format %{ "setbn$cop $dst\t# signed, int" %}
9207 ins_encode %{
9208 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9209 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9210 %}
9211 ins_pipe(ialu_reg);
9212 %}
9213
9214 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9215 %{
9216 predicate(!UseAPX);
9217 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9218
9219 ins_cost(200); // XXX
9220 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9221 ins_encode %{
9222 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9223 %}
9224 ins_pipe(pipe_cmov_reg);
9225 %}
9226
9227 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9228 %{
9229 predicate(UseAPX);
9230 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9231
9232 ins_cost(200);
9233 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9234 ins_encode %{
9235 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9236 %}
9237 ins_pipe(pipe_cmov_reg);
9238 %}
9239
9240 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9241 %{
9242 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9243 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9244
9245 ins_cost(100); // XXX
9246 format %{ "setbn$cop $dst\t# unsigned, int" %}
9247 ins_encode %{
9248 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9249 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9250 %}
9251 ins_pipe(ialu_reg);
9252 %}
9253
9254 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9255 predicate(!UseAPX);
9256 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9257
9258 ins_cost(200); // XXX
9259 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9260 ins_encode %{
9261 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9262 %}
9263 ins_pipe(pipe_cmov_reg);
9264 %}
9265
9266 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9267 predicate(UseAPX);
9268 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9269
9270 ins_cost(200);
9271 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9272 ins_encode %{
9273 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9274 %}
9275 ins_pipe(pipe_cmov_reg);
9276 %}
9277
9278 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9279 %{
9280 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9281 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9282
9283 ins_cost(100); // XXX
9284 format %{ "setbn$cop $dst\t# unsigned, int" %}
9285 ins_encode %{
9286 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9287 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9288 %}
9289 ins_pipe(ialu_reg);
9290 %}
9291
9292 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9293 %{
9294 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9295 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9296
9297 ins_cost(100); // XXX
9298 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9299 ins_encode %{
9300 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9301 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9302 %}
9303 ins_pipe(ialu_reg);
9304 %}
9305
9306 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9307 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9308
9309 ins_cost(200);
9310 expand %{
9311 cmovI_regU(cop, cr, dst, src);
9312 %}
9313 %}
9314
9315 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9316 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9317
9318 ins_cost(200);
9319 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9320 ins_encode %{
9321 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9322 %}
9323 ins_pipe(pipe_cmov_reg);
9324 %}
9325
9326 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9327 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9328 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9329
9330 ins_cost(200); // XXX
9331 format %{ "cmovpl $dst, $src\n\t"
9332 "cmovnel $dst, $src" %}
9333 ins_encode %{
9334 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9335 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9336 %}
9337 ins_pipe(pipe_cmov_reg);
9338 %}
9339
9340 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9341 // inputs of the CMove
9342 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9343 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9344 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9345 effect(TEMP dst);
9346
9347 ins_cost(200); // XXX
9348 format %{ "cmovpl $dst, $src\n\t"
9349 "cmovnel $dst, $src" %}
9350 ins_encode %{
9351 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9352 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9353 %}
9354 ins_pipe(pipe_cmov_reg);
9355 %}
9356
9357 // Conditional move
9358 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9359 predicate(!UseAPX);
9360 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9361
9362 ins_cost(250); // XXX
9363 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9364 ins_encode %{
9365 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9366 %}
9367 ins_pipe(pipe_cmov_mem);
9368 %}
9369
9370 // Conditional move
9371 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9372 %{
9373 predicate(UseAPX);
9374 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9375
9376 ins_cost(250);
9377 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9378 ins_encode %{
9379 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9380 %}
9381 ins_pipe(pipe_cmov_mem);
9382 %}
9383
9384 // Conditional move
9385 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9386 %{
9387 predicate(!UseAPX);
9388 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9389
9390 ins_cost(250); // XXX
9391 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9392 ins_encode %{
9393 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9394 %}
9395 ins_pipe(pipe_cmov_mem);
9396 %}
9397
9398 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9399 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9400
9401 ins_cost(250);
9402 expand %{
9403 cmovI_memU(cop, cr, dst, src);
9404 %}
9405 %}
9406
9407 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9408 %{
9409 predicate(UseAPX);
9410 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9411
9412 ins_cost(250);
9413 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9414 ins_encode %{
9415 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9416 %}
9417 ins_pipe(pipe_cmov_mem);
9418 %}
9419
9420 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9421 %{
9422 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9423
9424 ins_cost(250);
9425 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9426 ins_encode %{
9427 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9428 %}
9429 ins_pipe(pipe_cmov_mem);
9430 %}
9431
9432 // Conditional move
9433 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9434 %{
9435 predicate(!UseAPX);
9436 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9437
9438 ins_cost(200); // XXX
9439 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9440 ins_encode %{
9441 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9442 %}
9443 ins_pipe(pipe_cmov_reg);
9444 %}
9445
9446 // Conditional move ndd
9447 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9448 %{
9449 predicate(UseAPX);
9450 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9451
9452 ins_cost(200);
9453 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9454 ins_encode %{
9455 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9456 %}
9457 ins_pipe(pipe_cmov_reg);
9458 %}
9459
9460 // Conditional move
9461 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9462 %{
9463 predicate(!UseAPX);
9464 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9465
9466 ins_cost(200); // XXX
9467 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9468 ins_encode %{
9469 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9470 %}
9471 ins_pipe(pipe_cmov_reg);
9472 %}
9473
9474 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9475 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9476
9477 ins_cost(200);
9478 expand %{
9479 cmovN_regU(cop, cr, dst, src);
9480 %}
9481 %}
9482
9483 // Conditional move ndd
9484 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9485 %{
9486 predicate(UseAPX);
9487 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9488
9489 ins_cost(200);
9490 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9491 ins_encode %{
9492 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9493 %}
9494 ins_pipe(pipe_cmov_reg);
9495 %}
9496
9497 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9498 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9499
9500 ins_cost(200);
9501 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9502 ins_encode %{
9503 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9504 %}
9505 ins_pipe(pipe_cmov_reg);
9506 %}
9507
9508 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9509 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9510 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9511
9512 ins_cost(200); // XXX
9513 format %{ "cmovpl $dst, $src\n\t"
9514 "cmovnel $dst, $src" %}
9515 ins_encode %{
9516 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9517 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9518 %}
9519 ins_pipe(pipe_cmov_reg);
9520 %}
9521
9522 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9523 // inputs of the CMove
9524 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9525 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9526 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9527
9528 ins_cost(200); // XXX
9529 format %{ "cmovpl $dst, $src\n\t"
9530 "cmovnel $dst, $src" %}
9531 ins_encode %{
9532 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9533 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9534 %}
9535 ins_pipe(pipe_cmov_reg);
9536 %}
9537
9538 // Conditional move
9539 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9540 %{
9541 predicate(!UseAPX);
9542 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9543
9544 ins_cost(200); // XXX
9545 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9546 ins_encode %{
9547 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9548 %}
9549 ins_pipe(pipe_cmov_reg); // XXX
9550 %}
9551
9552 // Conditional move ndd
9553 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9554 %{
9555 predicate(UseAPX);
9556 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9557
9558 ins_cost(200);
9559 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9560 ins_encode %{
9561 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9562 %}
9563 ins_pipe(pipe_cmov_reg);
9564 %}
9565
9566 // Conditional move
9567 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9568 %{
9569 predicate(!UseAPX);
9570 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9571
9572 ins_cost(200); // XXX
9573 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9574 ins_encode %{
9575 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9576 %}
9577 ins_pipe(pipe_cmov_reg); // XXX
9578 %}
9579
9580 // Conditional move ndd
9581 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9582 %{
9583 predicate(UseAPX);
9584 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9585
9586 ins_cost(200);
9587 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9588 ins_encode %{
9589 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9590 %}
9591 ins_pipe(pipe_cmov_reg);
9592 %}
9593
9594 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9595 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9596
9597 ins_cost(200);
9598 expand %{
9599 cmovP_regU(cop, cr, dst, src);
9600 %}
9601 %}
9602
9603 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9604 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9605
9606 ins_cost(200);
9607 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9608 ins_encode %{
9609 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9610 %}
9611 ins_pipe(pipe_cmov_reg);
9612 %}
9613
9614 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9615 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9616 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9617
9618 ins_cost(200); // XXX
9619 format %{ "cmovpq $dst, $src\n\t"
9620 "cmovneq $dst, $src" %}
9621 ins_encode %{
9622 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9623 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9624 %}
9625 ins_pipe(pipe_cmov_reg);
9626 %}
9627
9628 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9629 // inputs of the CMove
9630 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9631 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9632 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9633
9634 ins_cost(200); // XXX
9635 format %{ "cmovpq $dst, $src\n\t"
9636 "cmovneq $dst, $src" %}
9637 ins_encode %{
9638 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9639 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9640 %}
9641 ins_pipe(pipe_cmov_reg);
9642 %}
9643
9644 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9645 %{
9646 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9647 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9648
9649 ins_cost(100); // XXX
9650 format %{ "setbn$cop $dst\t# signed, long" %}
9651 ins_encode %{
9652 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9653 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9654 %}
9655 ins_pipe(ialu_reg);
9656 %}
9657
9658 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9659 %{
9660 predicate(!UseAPX);
9661 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9662
9663 ins_cost(200); // XXX
9664 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9665 ins_encode %{
9666 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9667 %}
9668 ins_pipe(pipe_cmov_reg); // XXX
9669 %}
9670
9671 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9672 %{
9673 predicate(UseAPX);
9674 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9675
9676 ins_cost(200);
9677 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9678 ins_encode %{
9679 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9680 %}
9681 ins_pipe(pipe_cmov_reg);
9682 %}
9683
9684 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9685 %{
9686 predicate(!UseAPX);
9687 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9688
9689 ins_cost(200); // XXX
9690 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9691 ins_encode %{
9692 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9693 %}
9694 ins_pipe(pipe_cmov_mem); // XXX
9695 %}
9696
9697 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9698 %{
9699 predicate(UseAPX);
9700 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9701
9702 ins_cost(200);
9703 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9704 ins_encode %{
9705 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9706 %}
9707 ins_pipe(pipe_cmov_mem);
9708 %}
9709
9710 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9711 %{
9712 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9713 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9714
9715 ins_cost(100); // XXX
9716 format %{ "setbn$cop $dst\t# unsigned, long" %}
9717 ins_encode %{
9718 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9719 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9720 %}
9721 ins_pipe(ialu_reg);
9722 %}
9723
9724 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9725 %{
9726 predicate(!UseAPX);
9727 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9728
9729 ins_cost(200); // XXX
9730 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9731 ins_encode %{
9732 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9733 %}
9734 ins_pipe(pipe_cmov_reg); // XXX
9735 %}
9736
9737 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9738 %{
9739 predicate(UseAPX);
9740 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9741
9742 ins_cost(200);
9743 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9744 ins_encode %{
9745 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9746 %}
9747 ins_pipe(pipe_cmov_reg);
9748 %}
9749
9750 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9751 %{
9752 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9753 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9754
9755 ins_cost(100); // XXX
9756 format %{ "setbn$cop $dst\t# unsigned, long" %}
9757 ins_encode %{
9758 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9759 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9760 %}
9761 ins_pipe(ialu_reg);
9762 %}
9763
9764 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9765 %{
9766 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9767 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9768
9769 ins_cost(100); // XXX
9770 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9771 ins_encode %{
9772 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9773 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9774 %}
9775 ins_pipe(ialu_reg);
9776 %}
9777
9778 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9779 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9780
9781 ins_cost(200);
9782 expand %{
9783 cmovL_regU(cop, cr, dst, src);
9784 %}
9785 %}
9786
9787 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9788 %{
9789 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9790
9791 ins_cost(200);
9792 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9793 ins_encode %{
9794 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9795 %}
9796 ins_pipe(pipe_cmov_reg);
9797 %}
9798
9799 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9800 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9801 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9802
9803 ins_cost(200); // XXX
9804 format %{ "cmovpq $dst, $src\n\t"
9805 "cmovneq $dst, $src" %}
9806 ins_encode %{
9807 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9808 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9809 %}
9810 ins_pipe(pipe_cmov_reg);
9811 %}
9812
9813 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9814 // inputs of the CMove
9815 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9816 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9817 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9818
9819 ins_cost(200); // XXX
9820 format %{ "cmovpq $dst, $src\n\t"
9821 "cmovneq $dst, $src" %}
9822 ins_encode %{
9823 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9824 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9825 %}
9826 ins_pipe(pipe_cmov_reg);
9827 %}
9828
9829 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9830 %{
9831 predicate(!UseAPX);
9832 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9833
9834 ins_cost(200); // XXX
9835 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9836 ins_encode %{
9837 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9838 %}
9839 ins_pipe(pipe_cmov_mem); // XXX
9840 %}
9841
9842 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9843 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9844
9845 ins_cost(200);
9846 expand %{
9847 cmovL_memU(cop, cr, dst, src);
9848 %}
9849 %}
9850
9851 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9852 %{
9853 predicate(UseAPX);
9854 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9855
9856 ins_cost(200);
9857 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9858 ins_encode %{
9859 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9860 %}
9861 ins_pipe(pipe_cmov_mem);
9862 %}
9863
9864 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9865 %{
9866 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9867
9868 ins_cost(200);
9869 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9870 ins_encode %{
9871 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9872 %}
9873 ins_pipe(pipe_cmov_mem);
9874 %}
9875
9876 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9877 %{
9878 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9879
9880 ins_cost(200); // XXX
9881 format %{ "jn$cop skip\t# signed cmove float\n\t"
9882 "movss $dst, $src\n"
9883 "skip:" %}
9884 ins_encode %{
9885 Label Lskip;
9886 // Invert sense of branch from sense of CMOV
9887 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9888 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9889 __ bind(Lskip);
9890 %}
9891 ins_pipe(pipe_slow);
9892 %}
9893
9894 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9895 %{
9896 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9897
9898 ins_cost(200); // XXX
9899 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9900 "movss $dst, $src\n"
9901 "skip:" %}
9902 ins_encode %{
9903 Label Lskip;
9904 // Invert sense of branch from sense of CMOV
9905 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9906 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9907 __ bind(Lskip);
9908 %}
9909 ins_pipe(pipe_slow);
9910 %}
9911
9912 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9913 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9914
9915 ins_cost(200);
9916 expand %{
9917 cmovF_regU(cop, cr, dst, src);
9918 %}
9919 %}
9920
9921 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9922 %{
9923 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9924
9925 ins_cost(200); // XXX
9926 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9927 "movss $dst, $src\n"
9928 "skip:" %}
9929 ins_encode %{
9930 Label Lskip;
9931 // Invert sense of branch from sense of CMOV
9932 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9933 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9934 __ bind(Lskip);
9935 %}
9936 ins_pipe(pipe_slow);
9937 %}
9938
9939 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9940 %{
9941 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9942
9943 ins_cost(200); // XXX
9944 format %{ "jn$cop skip\t# signed cmove double\n\t"
9945 "movsd $dst, $src\n"
9946 "skip:" %}
9947 ins_encode %{
9948 Label Lskip;
9949 // Invert sense of branch from sense of CMOV
9950 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9951 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9952 __ bind(Lskip);
9953 %}
9954 ins_pipe(pipe_slow);
9955 %}
9956
9957 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9958 %{
9959 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9960
9961 ins_cost(200); // XXX
9962 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9963 "movsd $dst, $src\n"
9964 "skip:" %}
9965 ins_encode %{
9966 Label Lskip;
9967 // Invert sense of branch from sense of CMOV
9968 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9969 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9970 __ bind(Lskip);
9971 %}
9972 ins_pipe(pipe_slow);
9973 %}
9974
9975 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9976 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9977
9978 ins_cost(200);
9979 expand %{
9980 cmovD_regU(cop, cr, dst, src);
9981 %}
9982 %}
9983
9984 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9985 %{
9986 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9987
9988 ins_cost(200); // XXX
9989 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9990 "movsd $dst, $src\n"
9991 "skip:" %}
9992 ins_encode %{
9993 Label Lskip;
9994 // Invert sense of branch from sense of CMOV
9995 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9996 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9997 __ bind(Lskip);
9998 %}
9999 ins_pipe(pipe_slow);
10000 %}
10001
10002 //----------Arithmetic Instructions--------------------------------------------
10003 //----------Addition Instructions----------------------------------------------
10004
10005 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10006 %{
10007 predicate(!UseAPX);
10008 match(Set dst (AddI dst src));
10009 effect(KILL cr);
10010 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10011 format %{ "addl $dst, $src\t# int" %}
10012 ins_encode %{
10013 __ addl($dst$$Register, $src$$Register);
10014 %}
10015 ins_pipe(ialu_reg_reg);
10016 %}
10017
10018 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10019 %{
10020 predicate(UseAPX);
10021 match(Set dst (AddI src1 src2));
10022 effect(KILL cr);
10023 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10024
10025 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10026 ins_encode %{
10027 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
10028 %}
10029 ins_pipe(ialu_reg_reg);
10030 %}
10031
10032 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10033 %{
10034 predicate(!UseAPX);
10035 match(Set dst (AddI dst src));
10036 effect(KILL cr);
10037 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10038
10039 format %{ "addl $dst, $src\t# int" %}
10040 ins_encode %{
10041 __ addl($dst$$Register, $src$$constant);
10042 %}
10043 ins_pipe( ialu_reg );
10044 %}
10045
10046 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10047 %{
10048 predicate(UseAPX);
10049 match(Set dst (AddI src1 src2));
10050 effect(KILL cr);
10051 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10052
10053 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10054 ins_encode %{
10055 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10056 %}
10057 ins_pipe( ialu_reg );
10058 %}
10059
10060 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10061 %{
10062 predicate(UseAPX);
10063 match(Set dst (AddI (LoadI src1) src2));
10064 effect(KILL cr);
10065 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10066
10067 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10068 ins_encode %{
10069 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10070 %}
10071 ins_pipe( ialu_reg );
10072 %}
10073
10074 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10075 %{
10076 predicate(!UseAPX);
10077 match(Set dst (AddI dst (LoadI src)));
10078 effect(KILL cr);
10079 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10080
10081 ins_cost(150); // XXX
10082 format %{ "addl $dst, $src\t# int" %}
10083 ins_encode %{
10084 __ addl($dst$$Register, $src$$Address);
10085 %}
10086 ins_pipe(ialu_reg_mem);
10087 %}
10088
10089 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10090 %{
10091 predicate(UseAPX);
10092 match(Set dst (AddI src1 (LoadI src2)));
10093 effect(KILL cr);
10094 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10095
10096 ins_cost(150);
10097 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10098 ins_encode %{
10099 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10100 %}
10101 ins_pipe(ialu_reg_mem);
10102 %}
10103
10104 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10105 %{
10106 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10107 effect(KILL cr);
10108 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10109
10110 ins_cost(150); // XXX
10111 format %{ "addl $dst, $src\t# int" %}
10112 ins_encode %{
10113 __ addl($dst$$Address, $src$$Register);
10114 %}
10115 ins_pipe(ialu_mem_reg);
10116 %}
10117
10118 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10119 %{
10120 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10121 effect(KILL cr);
10122 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10123
10124
10125 ins_cost(125); // XXX
10126 format %{ "addl $dst, $src\t# int" %}
10127 ins_encode %{
10128 __ addl($dst$$Address, $src$$constant);
10129 %}
10130 ins_pipe(ialu_mem_imm);
10131 %}
10132
10133 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10134 %{
10135 predicate(!UseAPX && UseIncDec);
10136 match(Set dst (AddI dst src));
10137 effect(KILL cr);
10138
10139 format %{ "incl $dst\t# int" %}
10140 ins_encode %{
10141 __ incrementl($dst$$Register);
10142 %}
10143 ins_pipe(ialu_reg);
10144 %}
10145
10146 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10147 %{
10148 predicate(UseAPX && UseIncDec);
10149 match(Set dst (AddI src val));
10150 effect(KILL cr);
10151 flag(PD::Flag_ndd_demotable_opr1);
10152
10153 format %{ "eincl $dst, $src\t# int ndd" %}
10154 ins_encode %{
10155 __ eincl($dst$$Register, $src$$Register, false);
10156 %}
10157 ins_pipe(ialu_reg);
10158 %}
10159
10160 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10161 %{
10162 predicate(UseAPX && UseIncDec);
10163 match(Set dst (AddI (LoadI src) val));
10164 effect(KILL cr);
10165
10166 format %{ "eincl $dst, $src\t# int ndd" %}
10167 ins_encode %{
10168 __ eincl($dst$$Register, $src$$Address, false);
10169 %}
10170 ins_pipe(ialu_reg);
10171 %}
10172
10173 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10174 %{
10175 predicate(UseIncDec);
10176 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10177 effect(KILL cr);
10178
10179 ins_cost(125); // XXX
10180 format %{ "incl $dst\t# int" %}
10181 ins_encode %{
10182 __ incrementl($dst$$Address);
10183 %}
10184 ins_pipe(ialu_mem_imm);
10185 %}
10186
10187 // XXX why does that use AddI
10188 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10189 %{
10190 predicate(!UseAPX && UseIncDec);
10191 match(Set dst (AddI dst src));
10192 effect(KILL cr);
10193
10194 format %{ "decl $dst\t# int" %}
10195 ins_encode %{
10196 __ decrementl($dst$$Register);
10197 %}
10198 ins_pipe(ialu_reg);
10199 %}
10200
10201 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10202 %{
10203 predicate(UseAPX && UseIncDec);
10204 match(Set dst (AddI src val));
10205 effect(KILL cr);
10206 flag(PD::Flag_ndd_demotable_opr1);
10207
10208 format %{ "edecl $dst, $src\t# int ndd" %}
10209 ins_encode %{
10210 __ edecl($dst$$Register, $src$$Register, false);
10211 %}
10212 ins_pipe(ialu_reg);
10213 %}
10214
10215 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10216 %{
10217 predicate(UseAPX && UseIncDec);
10218 match(Set dst (AddI (LoadI src) val));
10219 effect(KILL cr);
10220
10221 format %{ "edecl $dst, $src\t# int ndd" %}
10222 ins_encode %{
10223 __ edecl($dst$$Register, $src$$Address, false);
10224 %}
10225 ins_pipe(ialu_reg);
10226 %}
10227
10228 // XXX why does that use AddI
10229 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10230 %{
10231 predicate(UseIncDec);
10232 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10233 effect(KILL cr);
10234
10235 ins_cost(125); // XXX
10236 format %{ "decl $dst\t# int" %}
10237 ins_encode %{
10238 __ decrementl($dst$$Address);
10239 %}
10240 ins_pipe(ialu_mem_imm);
10241 %}
10242
10243 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10244 %{
10245 predicate(VM_Version::supports_fast_2op_lea());
10246 match(Set dst (AddI (LShiftI index scale) disp));
10247
10248 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10249 ins_encode %{
10250 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10251 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10252 %}
10253 ins_pipe(ialu_reg_reg);
10254 %}
10255
10256 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10257 %{
10258 predicate(VM_Version::supports_fast_3op_lea());
10259 match(Set dst (AddI (AddI base index) disp));
10260
10261 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10262 ins_encode %{
10263 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10264 %}
10265 ins_pipe(ialu_reg_reg);
10266 %}
10267
10268 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10269 %{
10270 predicate(VM_Version::supports_fast_2op_lea());
10271 match(Set dst (AddI base (LShiftI index scale)));
10272
10273 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10274 ins_encode %{
10275 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10276 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10277 %}
10278 ins_pipe(ialu_reg_reg);
10279 %}
10280
10281 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10282 %{
10283 predicate(VM_Version::supports_fast_3op_lea());
10284 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10285
10286 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10287 ins_encode %{
10288 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10289 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10290 %}
10291 ins_pipe(ialu_reg_reg);
10292 %}
10293
10294 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10295 %{
10296 predicate(!UseAPX);
10297 match(Set dst (AddL dst src));
10298 effect(KILL cr);
10299 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10300
10301 format %{ "addq $dst, $src\t# long" %}
10302 ins_encode %{
10303 __ addq($dst$$Register, $src$$Register);
10304 %}
10305 ins_pipe(ialu_reg_reg);
10306 %}
10307
10308 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10309 %{
10310 predicate(UseAPX);
10311 match(Set dst (AddL src1 src2));
10312 effect(KILL cr);
10313 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10314
10315 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10316 ins_encode %{
10317 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10318 %}
10319 ins_pipe(ialu_reg_reg);
10320 %}
10321
10322 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10323 %{
10324 predicate(!UseAPX);
10325 match(Set dst (AddL dst src));
10326 effect(KILL cr);
10327 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10328
10329 format %{ "addq $dst, $src\t# long" %}
10330 ins_encode %{
10331 __ addq($dst$$Register, $src$$constant);
10332 %}
10333 ins_pipe( ialu_reg );
10334 %}
10335
10336 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10337 %{
10338 predicate(UseAPX);
10339 match(Set dst (AddL src1 src2));
10340 effect(KILL cr);
10341 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10342
10343 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10344 ins_encode %{
10345 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10346 %}
10347 ins_pipe( ialu_reg );
10348 %}
10349
10350 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10351 %{
10352 predicate(UseAPX);
10353 match(Set dst (AddL (LoadL src1) src2));
10354 effect(KILL cr);
10355 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10356
10357 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10358 ins_encode %{
10359 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10360 %}
10361 ins_pipe( ialu_reg );
10362 %}
10363
10364 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10365 %{
10366 predicate(!UseAPX);
10367 match(Set dst (AddL dst (LoadL src)));
10368 effect(KILL cr);
10369 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10370
10371 ins_cost(150); // XXX
10372 format %{ "addq $dst, $src\t# long" %}
10373 ins_encode %{
10374 __ addq($dst$$Register, $src$$Address);
10375 %}
10376 ins_pipe(ialu_reg_mem);
10377 %}
10378
10379 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10380 %{
10381 predicate(UseAPX);
10382 match(Set dst (AddL src1 (LoadL src2)));
10383 effect(KILL cr);
10384 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10385
10386 ins_cost(150);
10387 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10388 ins_encode %{
10389 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10390 %}
10391 ins_pipe(ialu_reg_mem);
10392 %}
10393
10394 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10395 %{
10396 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10397 effect(KILL cr);
10398 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10399
10400 ins_cost(150); // XXX
10401 format %{ "addq $dst, $src\t# long" %}
10402 ins_encode %{
10403 __ addq($dst$$Address, $src$$Register);
10404 %}
10405 ins_pipe(ialu_mem_reg);
10406 %}
10407
10408 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10409 %{
10410 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10411 effect(KILL cr);
10412 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10413
10414 ins_cost(125); // XXX
10415 format %{ "addq $dst, $src\t# long" %}
10416 ins_encode %{
10417 __ addq($dst$$Address, $src$$constant);
10418 %}
10419 ins_pipe(ialu_mem_imm);
10420 %}
10421
10422 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10423 %{
10424 predicate(!UseAPX && UseIncDec);
10425 match(Set dst (AddL dst src));
10426 effect(KILL cr);
10427
10428 format %{ "incq $dst\t# long" %}
10429 ins_encode %{
10430 __ incrementq($dst$$Register);
10431 %}
10432 ins_pipe(ialu_reg);
10433 %}
10434
10435 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10436 %{
10437 predicate(UseAPX && UseIncDec);
10438 match(Set dst (AddL src val));
10439 effect(KILL cr);
10440 flag(PD::Flag_ndd_demotable_opr1);
10441
10442 format %{ "eincq $dst, $src\t# long ndd" %}
10443 ins_encode %{
10444 __ eincq($dst$$Register, $src$$Register, false);
10445 %}
10446 ins_pipe(ialu_reg);
10447 %}
10448
10449 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10450 %{
10451 predicate(UseAPX && UseIncDec);
10452 match(Set dst (AddL (LoadL src) val));
10453 effect(KILL cr);
10454
10455 format %{ "eincq $dst, $src\t# long ndd" %}
10456 ins_encode %{
10457 __ eincq($dst$$Register, $src$$Address, false);
10458 %}
10459 ins_pipe(ialu_reg);
10460 %}
10461
10462 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10463 %{
10464 predicate(UseIncDec);
10465 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10466 effect(KILL cr);
10467
10468 ins_cost(125); // XXX
10469 format %{ "incq $dst\t# long" %}
10470 ins_encode %{
10471 __ incrementq($dst$$Address);
10472 %}
10473 ins_pipe(ialu_mem_imm);
10474 %}
10475
10476 // XXX why does that use AddL
10477 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10478 %{
10479 predicate(!UseAPX && UseIncDec);
10480 match(Set dst (AddL dst src));
10481 effect(KILL cr);
10482
10483 format %{ "decq $dst\t# long" %}
10484 ins_encode %{
10485 __ decrementq($dst$$Register);
10486 %}
10487 ins_pipe(ialu_reg);
10488 %}
10489
10490 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10491 %{
10492 predicate(UseAPX && UseIncDec);
10493 match(Set dst (AddL src val));
10494 effect(KILL cr);
10495 flag(PD::Flag_ndd_demotable_opr1);
10496
10497 format %{ "edecq $dst, $src\t# long ndd" %}
10498 ins_encode %{
10499 __ edecq($dst$$Register, $src$$Register, false);
10500 %}
10501 ins_pipe(ialu_reg);
10502 %}
10503
10504 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10505 %{
10506 predicate(UseAPX && UseIncDec);
10507 match(Set dst (AddL (LoadL src) val));
10508 effect(KILL cr);
10509
10510 format %{ "edecq $dst, $src\t# long ndd" %}
10511 ins_encode %{
10512 __ edecq($dst$$Register, $src$$Address, false);
10513 %}
10514 ins_pipe(ialu_reg);
10515 %}
10516
10517 // XXX why does that use AddL
10518 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10519 %{
10520 predicate(UseIncDec);
10521 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10522 effect(KILL cr);
10523
10524 ins_cost(125); // XXX
10525 format %{ "decq $dst\t# long" %}
10526 ins_encode %{
10527 __ decrementq($dst$$Address);
10528 %}
10529 ins_pipe(ialu_mem_imm);
10530 %}
10531
10532 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10533 %{
10534 predicate(VM_Version::supports_fast_2op_lea());
10535 match(Set dst (AddL (LShiftL index scale) disp));
10536
10537 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10538 ins_encode %{
10539 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10540 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10541 %}
10542 ins_pipe(ialu_reg_reg);
10543 %}
10544
10545 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10546 %{
10547 predicate(VM_Version::supports_fast_3op_lea());
10548 match(Set dst (AddL (AddL base index) disp));
10549
10550 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10551 ins_encode %{
10552 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10553 %}
10554 ins_pipe(ialu_reg_reg);
10555 %}
10556
10557 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10558 %{
10559 predicate(VM_Version::supports_fast_2op_lea());
10560 match(Set dst (AddL base (LShiftL index scale)));
10561
10562 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10563 ins_encode %{
10564 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10565 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10566 %}
10567 ins_pipe(ialu_reg_reg);
10568 %}
10569
10570 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10571 %{
10572 predicate(VM_Version::supports_fast_3op_lea());
10573 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10574
10575 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10576 ins_encode %{
10577 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10578 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10579 %}
10580 ins_pipe(ialu_reg_reg);
10581 %}
10582
10583 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10584 %{
10585 match(Set dst (AddP dst src));
10586 effect(KILL cr);
10587 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10588
10589 format %{ "addq $dst, $src\t# ptr" %}
10590 ins_encode %{
10591 __ addq($dst$$Register, $src$$Register);
10592 %}
10593 ins_pipe(ialu_reg_reg);
10594 %}
10595
10596 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10597 %{
10598 match(Set dst (AddP dst src));
10599 effect(KILL cr);
10600 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10601
10602 format %{ "addq $dst, $src\t# ptr" %}
10603 ins_encode %{
10604 __ addq($dst$$Register, $src$$constant);
10605 %}
10606 ins_pipe( ialu_reg );
10607 %}
10608
10609 // XXX addP mem ops ????
10610
10611 instruct checkCastPP(rRegP dst)
10612 %{
10613 match(Set dst (CheckCastPP dst));
10614
10615 size(0);
10616 format %{ "# checkcastPP of $dst" %}
10617 ins_encode(/* empty encoding */);
10618 ins_pipe(empty);
10619 %}
10620
10621 instruct castPP(rRegP dst)
10622 %{
10623 match(Set dst (CastPP dst));
10624
10625 size(0);
10626 format %{ "# castPP of $dst" %}
10627 ins_encode(/* empty encoding */);
10628 ins_pipe(empty);
10629 %}
10630
10631 instruct castII(rRegI dst)
10632 %{
10633 predicate(VerifyConstraintCasts == 0);
10634 match(Set dst (CastII dst));
10635
10636 size(0);
10637 format %{ "# castII of $dst" %}
10638 ins_encode(/* empty encoding */);
10639 ins_cost(0);
10640 ins_pipe(empty);
10641 %}
10642
10643 instruct castII_checked(rRegI dst, rFlagsReg cr)
10644 %{
10645 predicate(VerifyConstraintCasts > 0);
10646 match(Set dst (CastII dst));
10647
10648 effect(KILL cr);
10649 format %{ "# cast_checked_II $dst" %}
10650 ins_encode %{
10651 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10652 %}
10653 ins_pipe(pipe_slow);
10654 %}
10655
10656 instruct castLL(rRegL dst)
10657 %{
10658 predicate(VerifyConstraintCasts == 0);
10659 match(Set dst (CastLL dst));
10660
10661 size(0);
10662 format %{ "# castLL of $dst" %}
10663 ins_encode(/* empty encoding */);
10664 ins_cost(0);
10665 ins_pipe(empty);
10666 %}
10667
10668 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10669 %{
10670 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10671 match(Set dst (CastLL dst));
10672
10673 effect(KILL cr);
10674 format %{ "# cast_checked_LL $dst" %}
10675 ins_encode %{
10676 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10677 %}
10678 ins_pipe(pipe_slow);
10679 %}
10680
10681 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10682 %{
10683 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10684 match(Set dst (CastLL dst));
10685
10686 effect(KILL cr, TEMP tmp);
10687 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10688 ins_encode %{
10689 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10690 %}
10691 ins_pipe(pipe_slow);
10692 %}
10693
10694 instruct castFF(regF dst)
10695 %{
10696 match(Set dst (CastFF dst));
10697
10698 size(0);
10699 format %{ "# castFF of $dst" %}
10700 ins_encode(/* empty encoding */);
10701 ins_cost(0);
10702 ins_pipe(empty);
10703 %}
10704
10705 instruct castHH(regF dst)
10706 %{
10707 match(Set dst (CastHH dst));
10708
10709 size(0);
10710 format %{ "# castHH of $dst" %}
10711 ins_encode(/* empty encoding */);
10712 ins_cost(0);
10713 ins_pipe(empty);
10714 %}
10715
10716 instruct castDD(regD dst)
10717 %{
10718 match(Set dst (CastDD dst));
10719
10720 size(0);
10721 format %{ "# castDD of $dst" %}
10722 ins_encode(/* empty encoding */);
10723 ins_cost(0);
10724 ins_pipe(empty);
10725 %}
10726
10727 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10728 instruct compareAndSwapP(rRegI res,
10729 memory mem_ptr,
10730 rax_RegP oldval, rRegP newval,
10731 rFlagsReg cr)
10732 %{
10733 predicate(n->as_LoadStore()->barrier_data() == 0);
10734 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10735 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10736 effect(KILL cr, KILL oldval);
10737
10738 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10739 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10740 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10741 ins_encode %{
10742 __ lock();
10743 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10744 __ setcc(Assembler::equal, $res$$Register);
10745 %}
10746 ins_pipe( pipe_cmpxchg );
10747 %}
10748
10749 instruct compareAndSwapL(rRegI res,
10750 memory mem_ptr,
10751 rax_RegL oldval, rRegL newval,
10752 rFlagsReg cr)
10753 %{
10754 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10755 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10756 effect(KILL cr, KILL oldval);
10757
10758 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10759 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10760 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10761 ins_encode %{
10762 __ lock();
10763 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10764 __ setcc(Assembler::equal, $res$$Register);
10765 %}
10766 ins_pipe( pipe_cmpxchg );
10767 %}
10768
10769 instruct compareAndSwapI(rRegI res,
10770 memory mem_ptr,
10771 rax_RegI oldval, rRegI newval,
10772 rFlagsReg cr)
10773 %{
10774 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10775 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10776 effect(KILL cr, KILL oldval);
10777
10778 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10779 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10780 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10781 ins_encode %{
10782 __ lock();
10783 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10784 __ setcc(Assembler::equal, $res$$Register);
10785 %}
10786 ins_pipe( pipe_cmpxchg );
10787 %}
10788
10789 instruct compareAndSwapB(rRegI res,
10790 memory mem_ptr,
10791 rax_RegI oldval, rRegI newval,
10792 rFlagsReg cr)
10793 %{
10794 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10795 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10796 effect(KILL cr, KILL oldval);
10797
10798 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10799 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10800 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10801 ins_encode %{
10802 __ lock();
10803 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10804 __ setcc(Assembler::equal, $res$$Register);
10805 %}
10806 ins_pipe( pipe_cmpxchg );
10807 %}
10808
10809 instruct compareAndSwapS(rRegI res,
10810 memory mem_ptr,
10811 rax_RegI oldval, rRegI newval,
10812 rFlagsReg cr)
10813 %{
10814 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10815 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10816 effect(KILL cr, KILL oldval);
10817
10818 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10819 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10820 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10821 ins_encode %{
10822 __ lock();
10823 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10824 __ setcc(Assembler::equal, $res$$Register);
10825 %}
10826 ins_pipe( pipe_cmpxchg );
10827 %}
10828
10829 instruct compareAndSwapN(rRegI res,
10830 memory mem_ptr,
10831 rax_RegN oldval, rRegN newval,
10832 rFlagsReg cr) %{
10833 predicate(n->as_LoadStore()->barrier_data() == 0);
10834 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10835 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10836 effect(KILL cr, KILL oldval);
10837
10838 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10839 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10840 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10841 ins_encode %{
10842 __ lock();
10843 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10844 __ setcc(Assembler::equal, $res$$Register);
10845 %}
10846 ins_pipe( pipe_cmpxchg );
10847 %}
10848
10849 instruct compareAndExchangeB(
10850 memory mem_ptr,
10851 rax_RegI oldval, rRegI newval,
10852 rFlagsReg cr)
10853 %{
10854 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10855 effect(KILL cr);
10856
10857 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10858 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10859 ins_encode %{
10860 __ lock();
10861 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10862 %}
10863 ins_pipe( pipe_cmpxchg );
10864 %}
10865
10866 instruct compareAndExchangeS(
10867 memory mem_ptr,
10868 rax_RegI oldval, rRegI newval,
10869 rFlagsReg cr)
10870 %{
10871 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10872 effect(KILL cr);
10873
10874 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10875 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10876 ins_encode %{
10877 __ lock();
10878 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10879 %}
10880 ins_pipe( pipe_cmpxchg );
10881 %}
10882
10883 instruct compareAndExchangeI(
10884 memory mem_ptr,
10885 rax_RegI oldval, rRegI newval,
10886 rFlagsReg cr)
10887 %{
10888 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10889 effect(KILL cr);
10890
10891 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10892 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10893 ins_encode %{
10894 __ lock();
10895 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10896 %}
10897 ins_pipe( pipe_cmpxchg );
10898 %}
10899
10900 instruct compareAndExchangeL(
10901 memory mem_ptr,
10902 rax_RegL oldval, rRegL newval,
10903 rFlagsReg cr)
10904 %{
10905 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10906 effect(KILL cr);
10907
10908 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10909 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10910 ins_encode %{
10911 __ lock();
10912 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10913 %}
10914 ins_pipe( pipe_cmpxchg );
10915 %}
10916
10917 instruct compareAndExchangeN(
10918 memory mem_ptr,
10919 rax_RegN oldval, rRegN newval,
10920 rFlagsReg cr) %{
10921 predicate(n->as_LoadStore()->barrier_data() == 0);
10922 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10923 effect(KILL cr);
10924
10925 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10926 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10927 ins_encode %{
10928 __ lock();
10929 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10930 %}
10931 ins_pipe( pipe_cmpxchg );
10932 %}
10933
10934 instruct compareAndExchangeP(
10935 memory mem_ptr,
10936 rax_RegP oldval, rRegP newval,
10937 rFlagsReg cr)
10938 %{
10939 predicate(n->as_LoadStore()->barrier_data() == 0);
10940 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10941 effect(KILL cr);
10942
10943 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10944 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10945 ins_encode %{
10946 __ lock();
10947 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10948 %}
10949 ins_pipe( pipe_cmpxchg );
10950 %}
10951
10952 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10953 predicate(n->as_LoadStore()->result_not_used());
10954 match(Set dummy (GetAndAddB mem add));
10955 effect(KILL cr);
10956 format %{ "addb_lock $mem, $add" %}
10957 ins_encode %{
10958 __ lock();
10959 __ addb($mem$$Address, $add$$Register);
10960 %}
10961 ins_pipe(pipe_cmpxchg);
10962 %}
10963
10964 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10965 predicate(n->as_LoadStore()->result_not_used());
10966 match(Set dummy (GetAndAddB mem add));
10967 effect(KILL cr);
10968 format %{ "addb_lock $mem, $add" %}
10969 ins_encode %{
10970 __ lock();
10971 __ addb($mem$$Address, $add$$constant);
10972 %}
10973 ins_pipe(pipe_cmpxchg);
10974 %}
10975
10976 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10977 predicate(!n->as_LoadStore()->result_not_used());
10978 match(Set newval (GetAndAddB mem newval));
10979 effect(KILL cr);
10980 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10981 ins_encode %{
10982 __ lock();
10983 __ xaddb($mem$$Address, $newval$$Register);
10984 __ narrow_subword_type($newval$$Register, T_BYTE);
10985 %}
10986 ins_pipe(pipe_cmpxchg);
10987 %}
10988
10989 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10990 predicate(n->as_LoadStore()->result_not_used());
10991 match(Set dummy (GetAndAddS mem add));
10992 effect(KILL cr);
10993 format %{ "addw_lock $mem, $add" %}
10994 ins_encode %{
10995 __ lock();
10996 __ addw($mem$$Address, $add$$Register);
10997 %}
10998 ins_pipe(pipe_cmpxchg);
10999 %}
11000
11001 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11002 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
11003 match(Set dummy (GetAndAddS mem add));
11004 effect(KILL cr);
11005 format %{ "addw_lock $mem, $add" %}
11006 ins_encode %{
11007 __ lock();
11008 __ addw($mem$$Address, $add$$constant);
11009 %}
11010 ins_pipe(pipe_cmpxchg);
11011 %}
11012
11013 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
11014 predicate(!n->as_LoadStore()->result_not_used());
11015 match(Set newval (GetAndAddS mem newval));
11016 effect(KILL cr);
11017 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
11018 ins_encode %{
11019 __ lock();
11020 __ xaddw($mem$$Address, $newval$$Register);
11021 __ narrow_subword_type($newval$$Register, T_SHORT);
11022 %}
11023 ins_pipe(pipe_cmpxchg);
11024 %}
11025
11026 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
11027 predicate(n->as_LoadStore()->result_not_used());
11028 match(Set dummy (GetAndAddI mem add));
11029 effect(KILL cr);
11030 format %{ "addl_lock $mem, $add" %}
11031 ins_encode %{
11032 __ lock();
11033 __ addl($mem$$Address, $add$$Register);
11034 %}
11035 ins_pipe(pipe_cmpxchg);
11036 %}
11037
11038 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
11039 predicate(n->as_LoadStore()->result_not_used());
11040 match(Set dummy (GetAndAddI mem add));
11041 effect(KILL cr);
11042 format %{ "addl_lock $mem, $add" %}
11043 ins_encode %{
11044 __ lock();
11045 __ addl($mem$$Address, $add$$constant);
11046 %}
11047 ins_pipe(pipe_cmpxchg);
11048 %}
11049
11050 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11051 predicate(!n->as_LoadStore()->result_not_used());
11052 match(Set newval (GetAndAddI mem newval));
11053 effect(KILL cr);
11054 format %{ "xaddl_lock $mem, $newval" %}
11055 ins_encode %{
11056 __ lock();
11057 __ xaddl($mem$$Address, $newval$$Register);
11058 %}
11059 ins_pipe(pipe_cmpxchg);
11060 %}
11061
11062 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11063 predicate(n->as_LoadStore()->result_not_used());
11064 match(Set dummy (GetAndAddL mem add));
11065 effect(KILL cr);
11066 format %{ "addq_lock $mem, $add" %}
11067 ins_encode %{
11068 __ lock();
11069 __ addq($mem$$Address, $add$$Register);
11070 %}
11071 ins_pipe(pipe_cmpxchg);
11072 %}
11073
11074 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11075 predicate(n->as_LoadStore()->result_not_used());
11076 match(Set dummy (GetAndAddL mem add));
11077 effect(KILL cr);
11078 format %{ "addq_lock $mem, $add" %}
11079 ins_encode %{
11080 __ lock();
11081 __ addq($mem$$Address, $add$$constant);
11082 %}
11083 ins_pipe(pipe_cmpxchg);
11084 %}
11085
11086 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11087 predicate(!n->as_LoadStore()->result_not_used());
11088 match(Set newval (GetAndAddL mem newval));
11089 effect(KILL cr);
11090 format %{ "xaddq_lock $mem, $newval" %}
11091 ins_encode %{
11092 __ lock();
11093 __ xaddq($mem$$Address, $newval$$Register);
11094 %}
11095 ins_pipe(pipe_cmpxchg);
11096 %}
11097
11098 instruct xchgB( memory mem, rRegI newval) %{
11099 match(Set newval (GetAndSetB mem newval));
11100 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
11101 ins_encode %{
11102 __ xchgb($newval$$Register, $mem$$Address);
11103 __ narrow_subword_type($newval$$Register, T_BYTE);
11104 %}
11105 ins_pipe( pipe_cmpxchg );
11106 %}
11107
11108 instruct xchgS( memory mem, rRegI newval) %{
11109 match(Set newval (GetAndSetS mem newval));
11110 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
11111 ins_encode %{
11112 __ xchgw($newval$$Register, $mem$$Address);
11113 __ narrow_subword_type($newval$$Register, T_SHORT);
11114 %}
11115 ins_pipe( pipe_cmpxchg );
11116 %}
11117
11118 instruct xchgI( memory mem, rRegI newval) %{
11119 match(Set newval (GetAndSetI mem newval));
11120 format %{ "XCHGL $newval,[$mem]" %}
11121 ins_encode %{
11122 __ xchgl($newval$$Register, $mem$$Address);
11123 %}
11124 ins_pipe( pipe_cmpxchg );
11125 %}
11126
11127 instruct xchgL( memory mem, rRegL newval) %{
11128 match(Set newval (GetAndSetL mem newval));
11129 format %{ "XCHGL $newval,[$mem]" %}
11130 ins_encode %{
11131 __ xchgq($newval$$Register, $mem$$Address);
11132 %}
11133 ins_pipe( pipe_cmpxchg );
11134 %}
11135
11136 instruct xchgP( memory mem, rRegP newval) %{
11137 match(Set newval (GetAndSetP mem newval));
11138 predicate(n->as_LoadStore()->barrier_data() == 0);
11139 format %{ "XCHGQ $newval,[$mem]" %}
11140 ins_encode %{
11141 __ xchgq($newval$$Register, $mem$$Address);
11142 %}
11143 ins_pipe( pipe_cmpxchg );
11144 %}
11145
11146 instruct xchgN( memory mem, rRegN newval) %{
11147 predicate(n->as_LoadStore()->barrier_data() == 0);
11148 match(Set newval (GetAndSetN mem newval));
11149 format %{ "XCHGL $newval,$mem]" %}
11150 ins_encode %{
11151 __ xchgl($newval$$Register, $mem$$Address);
11152 %}
11153 ins_pipe( pipe_cmpxchg );
11154 %}
11155
11156 //----------Abs Instructions-------------------------------------------
11157
11158 // Integer Absolute Instructions
11159 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11160 %{
11161 match(Set dst (AbsI src));
11162 effect(TEMP dst, KILL cr);
11163 format %{ "xorl $dst, $dst\t# abs int\n\t"
11164 "subl $dst, $src\n\t"
11165 "cmovll $dst, $src" %}
11166 ins_encode %{
11167 __ xorl($dst$$Register, $dst$$Register);
11168 __ subl($dst$$Register, $src$$Register);
11169 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11170 %}
11171
11172 ins_pipe(ialu_reg_reg);
11173 %}
11174
11175 // Long Absolute Instructions
11176 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11177 %{
11178 match(Set dst (AbsL src));
11179 effect(TEMP dst, KILL cr);
11180 format %{ "xorl $dst, $dst\t# abs long\n\t"
11181 "subq $dst, $src\n\t"
11182 "cmovlq $dst, $src" %}
11183 ins_encode %{
11184 __ xorl($dst$$Register, $dst$$Register);
11185 __ subq($dst$$Register, $src$$Register);
11186 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11187 %}
11188
11189 ins_pipe(ialu_reg_reg);
11190 %}
11191
11192 //----------Subtraction Instructions-------------------------------------------
11193
11194 // Integer Subtraction Instructions
11195 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11196 %{
11197 predicate(!UseAPX);
11198 match(Set dst (SubI dst src));
11199 effect(KILL cr);
11200 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11201
11202 format %{ "subl $dst, $src\t# int" %}
11203 ins_encode %{
11204 __ subl($dst$$Register, $src$$Register);
11205 %}
11206 ins_pipe(ialu_reg_reg);
11207 %}
11208
11209 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11210 %{
11211 predicate(UseAPX);
11212 match(Set dst (SubI src1 src2));
11213 effect(KILL cr);
11214 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11215
11216 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11217 ins_encode %{
11218 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11219 %}
11220 ins_pipe(ialu_reg_reg);
11221 %}
11222
11223 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11224 %{
11225 predicate(UseAPX);
11226 match(Set dst (SubI src1 src2));
11227 effect(KILL cr);
11228 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11229
11230 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11231 ins_encode %{
11232 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11233 %}
11234 ins_pipe(ialu_reg_reg);
11235 %}
11236
11237 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11238 %{
11239 predicate(UseAPX);
11240 match(Set dst (SubI (LoadI src1) src2));
11241 effect(KILL cr);
11242 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11243
11244 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11245 ins_encode %{
11246 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11247 %}
11248 ins_pipe(ialu_reg_reg);
11249 %}
11250
11251 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11252 %{
11253 predicate(!UseAPX);
11254 match(Set dst (SubI dst (LoadI src)));
11255 effect(KILL cr);
11256 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11257
11258 ins_cost(150);
11259 format %{ "subl $dst, $src\t# int" %}
11260 ins_encode %{
11261 __ subl($dst$$Register, $src$$Address);
11262 %}
11263 ins_pipe(ialu_reg_mem);
11264 %}
11265
11266 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11267 %{
11268 predicate(UseAPX);
11269 match(Set dst (SubI src1 (LoadI src2)));
11270 effect(KILL cr);
11271 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11272
11273 ins_cost(150);
11274 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11275 ins_encode %{
11276 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11277 %}
11278 ins_pipe(ialu_reg_mem);
11279 %}
11280
11281 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11282 %{
11283 predicate(UseAPX);
11284 match(Set dst (SubI (LoadI src1) src2));
11285 effect(KILL cr);
11286 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11287
11288 ins_cost(150);
11289 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11290 ins_encode %{
11291 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11292 %}
11293 ins_pipe(ialu_reg_mem);
11294 %}
11295
11296 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11297 %{
11298 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11299 effect(KILL cr);
11300 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11301
11302 ins_cost(150);
11303 format %{ "subl $dst, $src\t# int" %}
11304 ins_encode %{
11305 __ subl($dst$$Address, $src$$Register);
11306 %}
11307 ins_pipe(ialu_mem_reg);
11308 %}
11309
11310 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11311 %{
11312 predicate(!UseAPX);
11313 match(Set dst (SubL dst src));
11314 effect(KILL cr);
11315 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11316
11317 format %{ "subq $dst, $src\t# long" %}
11318 ins_encode %{
11319 __ subq($dst$$Register, $src$$Register);
11320 %}
11321 ins_pipe(ialu_reg_reg);
11322 %}
11323
11324 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11325 %{
11326 predicate(UseAPX);
11327 match(Set dst (SubL src1 src2));
11328 effect(KILL cr);
11329 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11330
11331 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11332 ins_encode %{
11333 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11334 %}
11335 ins_pipe(ialu_reg_reg);
11336 %}
11337
11338 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11339 %{
11340 predicate(UseAPX);
11341 match(Set dst (SubL src1 src2));
11342 effect(KILL cr);
11343 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11344
11345 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11346 ins_encode %{
11347 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11348 %}
11349 ins_pipe(ialu_reg_reg);
11350 %}
11351
11352 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11353 %{
11354 predicate(UseAPX);
11355 match(Set dst (SubL (LoadL src1) src2));
11356 effect(KILL cr);
11357 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11358
11359 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11360 ins_encode %{
11361 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11362 %}
11363 ins_pipe(ialu_reg_reg);
11364 %}
11365
11366 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11367 %{
11368 predicate(!UseAPX);
11369 match(Set dst (SubL dst (LoadL src)));
11370 effect(KILL cr);
11371 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11372
11373 ins_cost(150);
11374 format %{ "subq $dst, $src\t# long" %}
11375 ins_encode %{
11376 __ subq($dst$$Register, $src$$Address);
11377 %}
11378 ins_pipe(ialu_reg_mem);
11379 %}
11380
11381 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11382 %{
11383 predicate(UseAPX);
11384 match(Set dst (SubL src1 (LoadL src2)));
11385 effect(KILL cr);
11386 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11387
11388 ins_cost(150);
11389 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11390 ins_encode %{
11391 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11392 %}
11393 ins_pipe(ialu_reg_mem);
11394 %}
11395
11396 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11397 %{
11398 predicate(UseAPX);
11399 match(Set dst (SubL (LoadL src1) src2));
11400 effect(KILL cr);
11401 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11402
11403 ins_cost(150);
11404 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11405 ins_encode %{
11406 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11407 %}
11408 ins_pipe(ialu_reg_mem);
11409 %}
11410
11411 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11412 %{
11413 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11414 effect(KILL cr);
11415 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11416
11417 ins_cost(150);
11418 format %{ "subq $dst, $src\t# long" %}
11419 ins_encode %{
11420 __ subq($dst$$Address, $src$$Register);
11421 %}
11422 ins_pipe(ialu_mem_reg);
11423 %}
11424
11425 // Subtract from a pointer
11426 // XXX hmpf???
11427 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11428 %{
11429 match(Set dst (AddP dst (SubI zero src)));
11430 effect(KILL cr);
11431
11432 format %{ "subq $dst, $src\t# ptr - int" %}
11433 ins_encode %{
11434 __ subq($dst$$Register, $src$$Register);
11435 %}
11436 ins_pipe(ialu_reg_reg);
11437 %}
11438
11439 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11440 %{
11441 predicate(!UseAPX);
11442 match(Set dst (SubI zero dst));
11443 effect(KILL cr);
11444 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11445
11446 format %{ "negl $dst\t# int" %}
11447 ins_encode %{
11448 __ negl($dst$$Register);
11449 %}
11450 ins_pipe(ialu_reg);
11451 %}
11452
11453 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11454 %{
11455 predicate(UseAPX);
11456 match(Set dst (SubI zero src));
11457 effect(KILL cr);
11458 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11459
11460 format %{ "enegl $dst, $src\t# int ndd" %}
11461 ins_encode %{
11462 __ enegl($dst$$Register, $src$$Register, false);
11463 %}
11464 ins_pipe(ialu_reg);
11465 %}
11466
11467 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11468 %{
11469 predicate(!UseAPX);
11470 match(Set dst (NegI dst));
11471 effect(KILL cr);
11472 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11473
11474 format %{ "negl $dst\t# int" %}
11475 ins_encode %{
11476 __ negl($dst$$Register);
11477 %}
11478 ins_pipe(ialu_reg);
11479 %}
11480
11481 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11482 %{
11483 predicate(UseAPX);
11484 match(Set dst (NegI src));
11485 effect(KILL cr);
11486 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11487
11488 format %{ "enegl $dst, $src\t# int ndd" %}
11489 ins_encode %{
11490 __ enegl($dst$$Register, $src$$Register, false);
11491 %}
11492 ins_pipe(ialu_reg);
11493 %}
11494
11495 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11496 %{
11497 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11498 effect(KILL cr);
11499 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11500
11501 format %{ "negl $dst\t# int" %}
11502 ins_encode %{
11503 __ negl($dst$$Address);
11504 %}
11505 ins_pipe(ialu_reg);
11506 %}
11507
11508 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11509 %{
11510 predicate(!UseAPX);
11511 match(Set dst (SubL zero dst));
11512 effect(KILL cr);
11513 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11514
11515 format %{ "negq $dst\t# long" %}
11516 ins_encode %{
11517 __ negq($dst$$Register);
11518 %}
11519 ins_pipe(ialu_reg);
11520 %}
11521
11522 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11523 %{
11524 predicate(UseAPX);
11525 match(Set dst (SubL zero src));
11526 effect(KILL cr);
11527 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11528
11529 format %{ "enegq $dst, $src\t# long ndd" %}
11530 ins_encode %{
11531 __ enegq($dst$$Register, $src$$Register, false);
11532 %}
11533 ins_pipe(ialu_reg);
11534 %}
11535
11536 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11537 %{
11538 predicate(!UseAPX);
11539 match(Set dst (NegL dst));
11540 effect(KILL cr);
11541 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11542
11543 format %{ "negq $dst\t# int" %}
11544 ins_encode %{
11545 __ negq($dst$$Register);
11546 %}
11547 ins_pipe(ialu_reg);
11548 %}
11549
11550 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11551 %{
11552 predicate(UseAPX);
11553 match(Set dst (NegL src));
11554 effect(KILL cr);
11555 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11556
11557 format %{ "enegq $dst, $src\t# long ndd" %}
11558 ins_encode %{
11559 __ enegq($dst$$Register, $src$$Register, false);
11560 %}
11561 ins_pipe(ialu_reg);
11562 %}
11563
11564 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11565 %{
11566 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11567 effect(KILL cr);
11568 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11569
11570 format %{ "negq $dst\t# long" %}
11571 ins_encode %{
11572 __ negq($dst$$Address);
11573 %}
11574 ins_pipe(ialu_reg);
11575 %}
11576
11577 //----------Multiplication/Division Instructions-------------------------------
11578 // Integer Multiplication Instructions
11579 // Multiply Register
11580
11581 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11582 %{
11583 predicate(!UseAPX);
11584 match(Set dst (MulI dst src));
11585 effect(KILL cr);
11586
11587 ins_cost(300);
11588 format %{ "imull $dst, $src\t# int" %}
11589 ins_encode %{
11590 __ imull($dst$$Register, $src$$Register);
11591 %}
11592 ins_pipe(ialu_reg_reg_alu0);
11593 %}
11594
11595 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11596 %{
11597 predicate(UseAPX);
11598 match(Set dst (MulI src1 src2));
11599 effect(KILL cr);
11600 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11601
11602 ins_cost(300);
11603 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11604 ins_encode %{
11605 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11606 %}
11607 ins_pipe(ialu_reg_reg_alu0);
11608 %}
11609
11610 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11611 %{
11612 match(Set dst (MulI src imm));
11613 effect(KILL cr);
11614
11615 ins_cost(300);
11616 format %{ "imull $dst, $src, $imm\t# int" %}
11617 ins_encode %{
11618 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11619 %}
11620 ins_pipe(ialu_reg_reg_alu0);
11621 %}
11622
11623 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11624 %{
11625 predicate(!UseAPX);
11626 match(Set dst (MulI dst (LoadI src)));
11627 effect(KILL cr);
11628
11629 ins_cost(350);
11630 format %{ "imull $dst, $src\t# int" %}
11631 ins_encode %{
11632 __ imull($dst$$Register, $src$$Address);
11633 %}
11634 ins_pipe(ialu_reg_mem_alu0);
11635 %}
11636
11637 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11638 %{
11639 predicate(UseAPX);
11640 match(Set dst (MulI src1 (LoadI src2)));
11641 effect(KILL cr);
11642 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11643
11644 ins_cost(350);
11645 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11646 ins_encode %{
11647 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11648 %}
11649 ins_pipe(ialu_reg_mem_alu0);
11650 %}
11651
11652 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11653 %{
11654 match(Set dst (MulI (LoadI src) imm));
11655 effect(KILL cr);
11656
11657 ins_cost(300);
11658 format %{ "imull $dst, $src, $imm\t# int" %}
11659 ins_encode %{
11660 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11661 %}
11662 ins_pipe(ialu_reg_mem_alu0);
11663 %}
11664
11665 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11666 %{
11667 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11668 effect(KILL cr, KILL src2);
11669
11670 expand %{ mulI_rReg(dst, src1, cr);
11671 mulI_rReg(src2, src3, cr);
11672 addI_rReg(dst, src2, cr); %}
11673 %}
11674
11675 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11676 %{
11677 predicate(!UseAPX);
11678 match(Set dst (MulL dst src));
11679 effect(KILL cr);
11680
11681 ins_cost(300);
11682 format %{ "imulq $dst, $src\t# long" %}
11683 ins_encode %{
11684 __ imulq($dst$$Register, $src$$Register);
11685 %}
11686 ins_pipe(ialu_reg_reg_alu0);
11687 %}
11688
11689 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11690 %{
11691 predicate(UseAPX);
11692 match(Set dst (MulL src1 src2));
11693 effect(KILL cr);
11694 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11695
11696 ins_cost(300);
11697 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11698 ins_encode %{
11699 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11700 %}
11701 ins_pipe(ialu_reg_reg_alu0);
11702 %}
11703
11704 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11705 %{
11706 match(Set dst (MulL src imm));
11707 effect(KILL cr);
11708
11709 ins_cost(300);
11710 format %{ "imulq $dst, $src, $imm\t# long" %}
11711 ins_encode %{
11712 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11713 %}
11714 ins_pipe(ialu_reg_reg_alu0);
11715 %}
11716
11717 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11718 %{
11719 predicate(!UseAPX);
11720 match(Set dst (MulL dst (LoadL src)));
11721 effect(KILL cr);
11722
11723 ins_cost(350);
11724 format %{ "imulq $dst, $src\t# long" %}
11725 ins_encode %{
11726 __ imulq($dst$$Register, $src$$Address);
11727 %}
11728 ins_pipe(ialu_reg_mem_alu0);
11729 %}
11730
11731 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11732 %{
11733 predicate(UseAPX);
11734 match(Set dst (MulL src1 (LoadL src2)));
11735 effect(KILL cr);
11736 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11737
11738 ins_cost(350);
11739 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11740 ins_encode %{
11741 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11742 %}
11743 ins_pipe(ialu_reg_mem_alu0);
11744 %}
11745
11746 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11747 %{
11748 match(Set dst (MulL (LoadL src) imm));
11749 effect(KILL cr);
11750
11751 ins_cost(300);
11752 format %{ "imulq $dst, $src, $imm\t# long" %}
11753 ins_encode %{
11754 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11755 %}
11756 ins_pipe(ialu_reg_mem_alu0);
11757 %}
11758
11759 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11760 %{
11761 match(Set dst (MulHiL src rax));
11762 effect(USE_KILL rax, KILL cr);
11763
11764 ins_cost(300);
11765 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11766 ins_encode %{
11767 __ imulq($src$$Register);
11768 %}
11769 ins_pipe(ialu_reg_reg_alu0);
11770 %}
11771
11772 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11773 %{
11774 match(Set dst (UMulHiL src rax));
11775 effect(USE_KILL rax, KILL cr);
11776
11777 ins_cost(300);
11778 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11779 ins_encode %{
11780 __ mulq($src$$Register);
11781 %}
11782 ins_pipe(ialu_reg_reg_alu0);
11783 %}
11784
11785 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11786 rFlagsReg cr)
11787 %{
11788 match(Set rax (DivI rax div));
11789 effect(KILL rdx, KILL cr);
11790
11791 ins_cost(30*100+10*100); // XXX
11792 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11793 "jne,s normal\n\t"
11794 "xorl rdx, rdx\n\t"
11795 "cmpl $div, -1\n\t"
11796 "je,s done\n"
11797 "normal: cdql\n\t"
11798 "idivl $div\n"
11799 "done:" %}
11800 ins_encode(cdql_enc(div));
11801 ins_pipe(ialu_reg_reg_alu0);
11802 %}
11803
11804 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11805 rFlagsReg cr)
11806 %{
11807 match(Set rax (DivL rax div));
11808 effect(KILL rdx, KILL cr);
11809
11810 ins_cost(30*100+10*100); // XXX
11811 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11812 "cmpq rax, rdx\n\t"
11813 "jne,s normal\n\t"
11814 "xorl rdx, rdx\n\t"
11815 "cmpq $div, -1\n\t"
11816 "je,s done\n"
11817 "normal: cdqq\n\t"
11818 "idivq $div\n"
11819 "done:" %}
11820 ins_encode(cdqq_enc(div));
11821 ins_pipe(ialu_reg_reg_alu0);
11822 %}
11823
11824 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11825 %{
11826 match(Set rax (UDivI rax div));
11827 effect(KILL rdx, KILL cr);
11828
11829 ins_cost(300);
11830 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11831 ins_encode %{
11832 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11833 %}
11834 ins_pipe(ialu_reg_reg_alu0);
11835 %}
11836
11837 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11838 %{
11839 match(Set rax (UDivL rax div));
11840 effect(KILL rdx, KILL cr);
11841
11842 ins_cost(300);
11843 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11844 ins_encode %{
11845 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11846 %}
11847 ins_pipe(ialu_reg_reg_alu0);
11848 %}
11849
11850 // Integer DIVMOD with Register, both quotient and mod results
11851 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11852 rFlagsReg cr)
11853 %{
11854 match(DivModI rax div);
11855 effect(KILL cr);
11856
11857 ins_cost(30*100+10*100); // XXX
11858 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11859 "jne,s normal\n\t"
11860 "xorl rdx, rdx\n\t"
11861 "cmpl $div, -1\n\t"
11862 "je,s done\n"
11863 "normal: cdql\n\t"
11864 "idivl $div\n"
11865 "done:" %}
11866 ins_encode(cdql_enc(div));
11867 ins_pipe(pipe_slow);
11868 %}
11869
11870 // Long DIVMOD with Register, both quotient and mod results
11871 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11872 rFlagsReg cr)
11873 %{
11874 match(DivModL rax div);
11875 effect(KILL cr);
11876
11877 ins_cost(30*100+10*100); // XXX
11878 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11879 "cmpq rax, rdx\n\t"
11880 "jne,s normal\n\t"
11881 "xorl rdx, rdx\n\t"
11882 "cmpq $div, -1\n\t"
11883 "je,s done\n"
11884 "normal: cdqq\n\t"
11885 "idivq $div\n"
11886 "done:" %}
11887 ins_encode(cdqq_enc(div));
11888 ins_pipe(pipe_slow);
11889 %}
11890
11891 // Unsigned integer DIVMOD with Register, both quotient and mod results
11892 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11893 no_rax_rdx_RegI div, rFlagsReg cr)
11894 %{
11895 match(UDivModI rax div);
11896 effect(TEMP tmp, KILL cr);
11897
11898 ins_cost(300);
11899 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11900 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11901 %}
11902 ins_encode %{
11903 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11904 %}
11905 ins_pipe(pipe_slow);
11906 %}
11907
11908 // Unsigned long DIVMOD with Register, both quotient and mod results
11909 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11910 no_rax_rdx_RegL div, rFlagsReg cr)
11911 %{
11912 match(UDivModL rax div);
11913 effect(TEMP tmp, KILL cr);
11914
11915 ins_cost(300);
11916 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11917 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11918 %}
11919 ins_encode %{
11920 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11921 %}
11922 ins_pipe(pipe_slow);
11923 %}
11924
11925 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11926 rFlagsReg cr)
11927 %{
11928 match(Set rdx (ModI rax div));
11929 effect(KILL rax, KILL cr);
11930
11931 ins_cost(300); // XXX
11932 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11933 "jne,s normal\n\t"
11934 "xorl rdx, rdx\n\t"
11935 "cmpl $div, -1\n\t"
11936 "je,s done\n"
11937 "normal: cdql\n\t"
11938 "idivl $div\n"
11939 "done:" %}
11940 ins_encode(cdql_enc(div));
11941 ins_pipe(ialu_reg_reg_alu0);
11942 %}
11943
11944 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11945 rFlagsReg cr)
11946 %{
11947 match(Set rdx (ModL rax div));
11948 effect(KILL rax, KILL cr);
11949
11950 ins_cost(300); // XXX
11951 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11952 "cmpq rax, rdx\n\t"
11953 "jne,s normal\n\t"
11954 "xorl rdx, rdx\n\t"
11955 "cmpq $div, -1\n\t"
11956 "je,s done\n"
11957 "normal: cdqq\n\t"
11958 "idivq $div\n"
11959 "done:" %}
11960 ins_encode(cdqq_enc(div));
11961 ins_pipe(ialu_reg_reg_alu0);
11962 %}
11963
11964 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11965 %{
11966 match(Set rdx (UModI rax div));
11967 effect(KILL rax, KILL cr);
11968
11969 ins_cost(300);
11970 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11971 ins_encode %{
11972 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11973 %}
11974 ins_pipe(ialu_reg_reg_alu0);
11975 %}
11976
11977 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11978 %{
11979 match(Set rdx (UModL rax div));
11980 effect(KILL rax, KILL cr);
11981
11982 ins_cost(300);
11983 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11984 ins_encode %{
11985 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11986 %}
11987 ins_pipe(ialu_reg_reg_alu0);
11988 %}
11989
11990 // Integer Shift Instructions
11991 // Shift Left by one, two, three
11992 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11993 %{
11994 predicate(!UseAPX);
11995 match(Set dst (LShiftI dst shift));
11996 effect(KILL cr);
11997
11998 format %{ "sall $dst, $shift" %}
11999 ins_encode %{
12000 __ sall($dst$$Register, $shift$$constant);
12001 %}
12002 ins_pipe(ialu_reg);
12003 %}
12004
12005 // Shift Left by one, two, three
12006 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
12007 %{
12008 predicate(UseAPX);
12009 match(Set dst (LShiftI src shift));
12010 effect(KILL cr);
12011 flag(PD::Flag_ndd_demotable_opr1);
12012
12013 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
12014 ins_encode %{
12015 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12016 %}
12017 ins_pipe(ialu_reg);
12018 %}
12019
12020 // Shift Left by 8-bit immediate
12021 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12022 %{
12023 predicate(!UseAPX);
12024 match(Set dst (LShiftI dst shift));
12025 effect(KILL cr);
12026
12027 format %{ "sall $dst, $shift" %}
12028 ins_encode %{
12029 __ sall($dst$$Register, $shift$$constant);
12030 %}
12031 ins_pipe(ialu_reg);
12032 %}
12033
12034 // Shift Left by 8-bit immediate
12035 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12036 %{
12037 predicate(UseAPX);
12038 match(Set dst (LShiftI src shift));
12039 effect(KILL cr);
12040 flag(PD::Flag_ndd_demotable_opr1);
12041
12042 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12043 ins_encode %{
12044 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
12045 %}
12046 ins_pipe(ialu_reg);
12047 %}
12048
12049 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12050 %{
12051 predicate(UseAPX);
12052 match(Set dst (LShiftI (LoadI src) shift));
12053 effect(KILL cr);
12054
12055 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12056 ins_encode %{
12057 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12058 %}
12059 ins_pipe(ialu_reg);
12060 %}
12061
12062 // Shift Left by 8-bit immediate
12063 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12064 %{
12065 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12066 effect(KILL cr);
12067
12068 format %{ "sall $dst, $shift" %}
12069 ins_encode %{
12070 __ sall($dst$$Address, $shift$$constant);
12071 %}
12072 ins_pipe(ialu_mem_imm);
12073 %}
12074
12075 // Shift Left by variable
12076 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12077 %{
12078 predicate(!VM_Version::supports_bmi2());
12079 match(Set dst (LShiftI dst shift));
12080 effect(KILL cr);
12081
12082 format %{ "sall $dst, $shift" %}
12083 ins_encode %{
12084 __ sall($dst$$Register);
12085 %}
12086 ins_pipe(ialu_reg_reg);
12087 %}
12088
12089 // Shift Left by variable
12090 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12091 %{
12092 predicate(!VM_Version::supports_bmi2());
12093 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12094 effect(KILL cr);
12095
12096 format %{ "sall $dst, $shift" %}
12097 ins_encode %{
12098 __ sall($dst$$Address);
12099 %}
12100 ins_pipe(ialu_mem_reg);
12101 %}
12102
12103 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12104 %{
12105 predicate(VM_Version::supports_bmi2());
12106 match(Set dst (LShiftI src shift));
12107
12108 format %{ "shlxl $dst, $src, $shift" %}
12109 ins_encode %{
12110 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12111 %}
12112 ins_pipe(ialu_reg_reg);
12113 %}
12114
12115 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12116 %{
12117 predicate(VM_Version::supports_bmi2());
12118 match(Set dst (LShiftI (LoadI src) shift));
12119 ins_cost(175);
12120 format %{ "shlxl $dst, $src, $shift" %}
12121 ins_encode %{
12122 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12123 %}
12124 ins_pipe(ialu_reg_mem);
12125 %}
12126
12127 // Arithmetic Shift Right by 8-bit immediate
12128 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12129 %{
12130 predicate(!UseAPX);
12131 match(Set dst (RShiftI dst shift));
12132 effect(KILL cr);
12133
12134 format %{ "sarl $dst, $shift" %}
12135 ins_encode %{
12136 __ sarl($dst$$Register, $shift$$constant);
12137 %}
12138 ins_pipe(ialu_mem_imm);
12139 %}
12140
12141 // Arithmetic Shift Right by 8-bit immediate
12142 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12143 %{
12144 predicate(UseAPX);
12145 match(Set dst (RShiftI src shift));
12146 effect(KILL cr);
12147 flag(PD::Flag_ndd_demotable_opr1);
12148
12149 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12150 ins_encode %{
12151 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12152 %}
12153 ins_pipe(ialu_mem_imm);
12154 %}
12155
12156 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12157 %{
12158 predicate(UseAPX);
12159 match(Set dst (RShiftI (LoadI src) shift));
12160 effect(KILL cr);
12161
12162 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12163 ins_encode %{
12164 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12165 %}
12166 ins_pipe(ialu_mem_imm);
12167 %}
12168
12169 // Arithmetic Shift Right by 8-bit immediate
12170 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12171 %{
12172 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12173 effect(KILL cr);
12174
12175 format %{ "sarl $dst, $shift" %}
12176 ins_encode %{
12177 __ sarl($dst$$Address, $shift$$constant);
12178 %}
12179 ins_pipe(ialu_mem_imm);
12180 %}
12181
12182 // Arithmetic Shift Right by variable
12183 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12184 %{
12185 predicate(!VM_Version::supports_bmi2());
12186 match(Set dst (RShiftI dst shift));
12187 effect(KILL cr);
12188
12189 format %{ "sarl $dst, $shift" %}
12190 ins_encode %{
12191 __ sarl($dst$$Register);
12192 %}
12193 ins_pipe(ialu_reg_reg);
12194 %}
12195
12196 // Arithmetic Shift Right by variable
12197 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12198 %{
12199 predicate(!VM_Version::supports_bmi2());
12200 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12201 effect(KILL cr);
12202
12203 format %{ "sarl $dst, $shift" %}
12204 ins_encode %{
12205 __ sarl($dst$$Address);
12206 %}
12207 ins_pipe(ialu_mem_reg);
12208 %}
12209
12210 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12211 %{
12212 predicate(VM_Version::supports_bmi2());
12213 match(Set dst (RShiftI src shift));
12214
12215 format %{ "sarxl $dst, $src, $shift" %}
12216 ins_encode %{
12217 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12218 %}
12219 ins_pipe(ialu_reg_reg);
12220 %}
12221
12222 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12223 %{
12224 predicate(VM_Version::supports_bmi2());
12225 match(Set dst (RShiftI (LoadI src) shift));
12226 ins_cost(175);
12227 format %{ "sarxl $dst, $src, $shift" %}
12228 ins_encode %{
12229 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12230 %}
12231 ins_pipe(ialu_reg_mem);
12232 %}
12233
12234 // Logical Shift Right by 8-bit immediate
12235 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12236 %{
12237 predicate(!UseAPX);
12238 match(Set dst (URShiftI dst shift));
12239 effect(KILL cr);
12240
12241 format %{ "shrl $dst, $shift" %}
12242 ins_encode %{
12243 __ shrl($dst$$Register, $shift$$constant);
12244 %}
12245 ins_pipe(ialu_reg);
12246 %}
12247
12248 // Logical Shift Right by 8-bit immediate
12249 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12250 %{
12251 predicate(UseAPX);
12252 match(Set dst (URShiftI src shift));
12253 effect(KILL cr);
12254 flag(PD::Flag_ndd_demotable_opr1);
12255
12256 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12257 ins_encode %{
12258 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12259 %}
12260 ins_pipe(ialu_reg);
12261 %}
12262
12263 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12264 %{
12265 predicate(UseAPX);
12266 match(Set dst (URShiftI (LoadI src) shift));
12267 effect(KILL cr);
12268
12269 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12270 ins_encode %{
12271 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12272 %}
12273 ins_pipe(ialu_reg);
12274 %}
12275
12276 // Logical Shift Right by 8-bit immediate
12277 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12278 %{
12279 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12280 effect(KILL cr);
12281
12282 format %{ "shrl $dst, $shift" %}
12283 ins_encode %{
12284 __ shrl($dst$$Address, $shift$$constant);
12285 %}
12286 ins_pipe(ialu_mem_imm);
12287 %}
12288
12289 // Logical Shift Right by variable
12290 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12291 %{
12292 predicate(!VM_Version::supports_bmi2());
12293 match(Set dst (URShiftI dst shift));
12294 effect(KILL cr);
12295
12296 format %{ "shrl $dst, $shift" %}
12297 ins_encode %{
12298 __ shrl($dst$$Register);
12299 %}
12300 ins_pipe(ialu_reg_reg);
12301 %}
12302
12303 // Logical Shift Right by variable
12304 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12305 %{
12306 predicate(!VM_Version::supports_bmi2());
12307 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12308 effect(KILL cr);
12309
12310 format %{ "shrl $dst, $shift" %}
12311 ins_encode %{
12312 __ shrl($dst$$Address);
12313 %}
12314 ins_pipe(ialu_mem_reg);
12315 %}
12316
12317 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12318 %{
12319 predicate(VM_Version::supports_bmi2());
12320 match(Set dst (URShiftI src shift));
12321
12322 format %{ "shrxl $dst, $src, $shift" %}
12323 ins_encode %{
12324 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12325 %}
12326 ins_pipe(ialu_reg_reg);
12327 %}
12328
12329 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12330 %{
12331 predicate(VM_Version::supports_bmi2());
12332 match(Set dst (URShiftI (LoadI src) shift));
12333 ins_cost(175);
12334 format %{ "shrxl $dst, $src, $shift" %}
12335 ins_encode %{
12336 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12337 %}
12338 ins_pipe(ialu_reg_mem);
12339 %}
12340
12341 // Long Shift Instructions
12342 // Shift Left by one, two, three
12343 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12344 %{
12345 predicate(!UseAPX);
12346 match(Set dst (LShiftL dst shift));
12347 effect(KILL cr);
12348
12349 format %{ "salq $dst, $shift" %}
12350 ins_encode %{
12351 __ salq($dst$$Register, $shift$$constant);
12352 %}
12353 ins_pipe(ialu_reg);
12354 %}
12355
12356 // Shift Left by one, two, three
12357 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12358 %{
12359 predicate(UseAPX);
12360 match(Set dst (LShiftL src shift));
12361 effect(KILL cr);
12362 flag(PD::Flag_ndd_demotable_opr1);
12363
12364 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12365 ins_encode %{
12366 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12367 %}
12368 ins_pipe(ialu_reg);
12369 %}
12370
12371 // Shift Left by 8-bit immediate
12372 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12373 %{
12374 predicate(!UseAPX);
12375 match(Set dst (LShiftL dst shift));
12376 effect(KILL cr);
12377
12378 format %{ "salq $dst, $shift" %}
12379 ins_encode %{
12380 __ salq($dst$$Register, $shift$$constant);
12381 %}
12382 ins_pipe(ialu_reg);
12383 %}
12384
12385 // Shift Left by 8-bit immediate
12386 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12387 %{
12388 predicate(UseAPX);
12389 match(Set dst (LShiftL src shift));
12390 effect(KILL cr);
12391 flag(PD::Flag_ndd_demotable_opr1);
12392
12393 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12394 ins_encode %{
12395 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12396 %}
12397 ins_pipe(ialu_reg);
12398 %}
12399
12400 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12401 %{
12402 predicate(UseAPX);
12403 match(Set dst (LShiftL (LoadL src) shift));
12404 effect(KILL cr);
12405
12406 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12407 ins_encode %{
12408 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12409 %}
12410 ins_pipe(ialu_reg);
12411 %}
12412
12413 // Shift Left by 8-bit immediate
12414 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12415 %{
12416 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12417 effect(KILL cr);
12418
12419 format %{ "salq $dst, $shift" %}
12420 ins_encode %{
12421 __ salq($dst$$Address, $shift$$constant);
12422 %}
12423 ins_pipe(ialu_mem_imm);
12424 %}
12425
12426 // Shift Left by variable
12427 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12428 %{
12429 predicate(!VM_Version::supports_bmi2());
12430 match(Set dst (LShiftL dst shift));
12431 effect(KILL cr);
12432
12433 format %{ "salq $dst, $shift" %}
12434 ins_encode %{
12435 __ salq($dst$$Register);
12436 %}
12437 ins_pipe(ialu_reg_reg);
12438 %}
12439
12440 // Shift Left by variable
12441 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12442 %{
12443 predicate(!VM_Version::supports_bmi2());
12444 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12445 effect(KILL cr);
12446
12447 format %{ "salq $dst, $shift" %}
12448 ins_encode %{
12449 __ salq($dst$$Address);
12450 %}
12451 ins_pipe(ialu_mem_reg);
12452 %}
12453
12454 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12455 %{
12456 predicate(VM_Version::supports_bmi2());
12457 match(Set dst (LShiftL src shift));
12458
12459 format %{ "shlxq $dst, $src, $shift" %}
12460 ins_encode %{
12461 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12462 %}
12463 ins_pipe(ialu_reg_reg);
12464 %}
12465
12466 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12467 %{
12468 predicate(VM_Version::supports_bmi2());
12469 match(Set dst (LShiftL (LoadL src) shift));
12470 ins_cost(175);
12471 format %{ "shlxq $dst, $src, $shift" %}
12472 ins_encode %{
12473 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12474 %}
12475 ins_pipe(ialu_reg_mem);
12476 %}
12477
12478 // Arithmetic Shift Right by 8-bit immediate
12479 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12480 %{
12481 predicate(!UseAPX);
12482 match(Set dst (RShiftL dst shift));
12483 effect(KILL cr);
12484
12485 format %{ "sarq $dst, $shift" %}
12486 ins_encode %{
12487 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12488 %}
12489 ins_pipe(ialu_mem_imm);
12490 %}
12491
12492 // Arithmetic Shift Right by 8-bit immediate
12493 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12494 %{
12495 predicate(UseAPX);
12496 match(Set dst (RShiftL src shift));
12497 effect(KILL cr);
12498 flag(PD::Flag_ndd_demotable_opr1);
12499
12500 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12501 ins_encode %{
12502 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12503 %}
12504 ins_pipe(ialu_mem_imm);
12505 %}
12506
12507 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12508 %{
12509 predicate(UseAPX);
12510 match(Set dst (RShiftL (LoadL src) shift));
12511 effect(KILL cr);
12512
12513 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12514 ins_encode %{
12515 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12516 %}
12517 ins_pipe(ialu_mem_imm);
12518 %}
12519
12520 // Arithmetic Shift Right by 8-bit immediate
12521 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12522 %{
12523 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12524 effect(KILL cr);
12525
12526 format %{ "sarq $dst, $shift" %}
12527 ins_encode %{
12528 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12529 %}
12530 ins_pipe(ialu_mem_imm);
12531 %}
12532
12533 // Arithmetic Shift Right by variable
12534 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12535 %{
12536 predicate(!VM_Version::supports_bmi2());
12537 match(Set dst (RShiftL dst shift));
12538 effect(KILL cr);
12539
12540 format %{ "sarq $dst, $shift" %}
12541 ins_encode %{
12542 __ sarq($dst$$Register);
12543 %}
12544 ins_pipe(ialu_reg_reg);
12545 %}
12546
12547 // Arithmetic Shift Right by variable
12548 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12549 %{
12550 predicate(!VM_Version::supports_bmi2());
12551 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12552 effect(KILL cr);
12553
12554 format %{ "sarq $dst, $shift" %}
12555 ins_encode %{
12556 __ sarq($dst$$Address);
12557 %}
12558 ins_pipe(ialu_mem_reg);
12559 %}
12560
12561 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12562 %{
12563 predicate(VM_Version::supports_bmi2());
12564 match(Set dst (RShiftL src shift));
12565
12566 format %{ "sarxq $dst, $src, $shift" %}
12567 ins_encode %{
12568 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12569 %}
12570 ins_pipe(ialu_reg_reg);
12571 %}
12572
12573 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12574 %{
12575 predicate(VM_Version::supports_bmi2());
12576 match(Set dst (RShiftL (LoadL src) shift));
12577 ins_cost(175);
12578 format %{ "sarxq $dst, $src, $shift" %}
12579 ins_encode %{
12580 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12581 %}
12582 ins_pipe(ialu_reg_mem);
12583 %}
12584
12585 // Logical Shift Right by 8-bit immediate
12586 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12587 %{
12588 predicate(!UseAPX);
12589 match(Set dst (URShiftL dst shift));
12590 effect(KILL cr);
12591
12592 format %{ "shrq $dst, $shift" %}
12593 ins_encode %{
12594 __ shrq($dst$$Register, $shift$$constant);
12595 %}
12596 ins_pipe(ialu_reg);
12597 %}
12598
12599 // Logical Shift Right by 8-bit immediate
12600 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12601 %{
12602 predicate(UseAPX);
12603 match(Set dst (URShiftL src shift));
12604 effect(KILL cr);
12605 flag(PD::Flag_ndd_demotable_opr1);
12606
12607 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12608 ins_encode %{
12609 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12610 %}
12611 ins_pipe(ialu_reg);
12612 %}
12613
12614 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12615 %{
12616 predicate(UseAPX);
12617 match(Set dst (URShiftL (LoadL src) shift));
12618 effect(KILL cr);
12619
12620 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12621 ins_encode %{
12622 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12623 %}
12624 ins_pipe(ialu_reg);
12625 %}
12626
12627 // Logical Shift Right by 8-bit immediate
12628 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12629 %{
12630 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12631 effect(KILL cr);
12632
12633 format %{ "shrq $dst, $shift" %}
12634 ins_encode %{
12635 __ shrq($dst$$Address, $shift$$constant);
12636 %}
12637 ins_pipe(ialu_mem_imm);
12638 %}
12639
12640 // Logical Shift Right by variable
12641 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12642 %{
12643 predicate(!VM_Version::supports_bmi2());
12644 match(Set dst (URShiftL dst shift));
12645 effect(KILL cr);
12646
12647 format %{ "shrq $dst, $shift" %}
12648 ins_encode %{
12649 __ shrq($dst$$Register);
12650 %}
12651 ins_pipe(ialu_reg_reg);
12652 %}
12653
12654 // Logical Shift Right by variable
12655 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12656 %{
12657 predicate(!VM_Version::supports_bmi2());
12658 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12659 effect(KILL cr);
12660
12661 format %{ "shrq $dst, $shift" %}
12662 ins_encode %{
12663 __ shrq($dst$$Address);
12664 %}
12665 ins_pipe(ialu_mem_reg);
12666 %}
12667
12668 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12669 %{
12670 predicate(VM_Version::supports_bmi2());
12671 match(Set dst (URShiftL src shift));
12672
12673 format %{ "shrxq $dst, $src, $shift" %}
12674 ins_encode %{
12675 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12676 %}
12677 ins_pipe(ialu_reg_reg);
12678 %}
12679
12680 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12681 %{
12682 predicate(VM_Version::supports_bmi2());
12683 match(Set dst (URShiftL (LoadL src) shift));
12684 ins_cost(175);
12685 format %{ "shrxq $dst, $src, $shift" %}
12686 ins_encode %{
12687 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12688 %}
12689 ins_pipe(ialu_reg_mem);
12690 %}
12691
12692 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12693 // This idiom is used by the compiler for the i2b bytecode.
12694 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12695 %{
12696 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12697
12698 format %{ "movsbl $dst, $src\t# i2b" %}
12699 ins_encode %{
12700 __ movsbl($dst$$Register, $src$$Register);
12701 %}
12702 ins_pipe(ialu_reg_reg);
12703 %}
12704
12705 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12706 // This idiom is used by the compiler the i2s bytecode.
12707 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12708 %{
12709 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12710
12711 format %{ "movswl $dst, $src\t# i2s" %}
12712 ins_encode %{
12713 __ movswl($dst$$Register, $src$$Register);
12714 %}
12715 ins_pipe(ialu_reg_reg);
12716 %}
12717
12718 // ROL/ROR instructions
12719
12720 // Rotate left by constant.
12721 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12722 %{
12723 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12724 match(Set dst (RotateLeft dst shift));
12725 effect(KILL cr);
12726 format %{ "roll $dst, $shift" %}
12727 ins_encode %{
12728 __ roll($dst$$Register, $shift$$constant);
12729 %}
12730 ins_pipe(ialu_reg);
12731 %}
12732
12733 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12734 %{
12735 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12736 match(Set dst (RotateLeft src shift));
12737 format %{ "rolxl $dst, $src, $shift" %}
12738 ins_encode %{
12739 int shift = 32 - ($shift$$constant & 31);
12740 __ rorxl($dst$$Register, $src$$Register, shift);
12741 %}
12742 ins_pipe(ialu_reg_reg);
12743 %}
12744
12745 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12746 %{
12747 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12748 match(Set dst (RotateLeft (LoadI src) shift));
12749 ins_cost(175);
12750 format %{ "rolxl $dst, $src, $shift" %}
12751 ins_encode %{
12752 int shift = 32 - ($shift$$constant & 31);
12753 __ rorxl($dst$$Register, $src$$Address, shift);
12754 %}
12755 ins_pipe(ialu_reg_mem);
12756 %}
12757
12758 // Rotate Left by variable
12759 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12760 %{
12761 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12762 match(Set dst (RotateLeft dst shift));
12763 effect(KILL cr);
12764 format %{ "roll $dst, $shift" %}
12765 ins_encode %{
12766 __ roll($dst$$Register);
12767 %}
12768 ins_pipe(ialu_reg_reg);
12769 %}
12770
12771 // Rotate Left by variable
12772 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12773 %{
12774 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12775 match(Set dst (RotateLeft src shift));
12776 effect(KILL cr);
12777 flag(PD::Flag_ndd_demotable_opr1);
12778
12779 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12780 ins_encode %{
12781 __ eroll($dst$$Register, $src$$Register, false);
12782 %}
12783 ins_pipe(ialu_reg_reg);
12784 %}
12785
12786 // Rotate Right by constant.
12787 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12788 %{
12789 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12790 match(Set dst (RotateRight dst shift));
12791 effect(KILL cr);
12792 format %{ "rorl $dst, $shift" %}
12793 ins_encode %{
12794 __ rorl($dst$$Register, $shift$$constant);
12795 %}
12796 ins_pipe(ialu_reg);
12797 %}
12798
12799 // Rotate Right by constant.
12800 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12801 %{
12802 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12803 match(Set dst (RotateRight src shift));
12804 format %{ "rorxl $dst, $src, $shift" %}
12805 ins_encode %{
12806 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12807 %}
12808 ins_pipe(ialu_reg_reg);
12809 %}
12810
12811 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12812 %{
12813 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12814 match(Set dst (RotateRight (LoadI src) shift));
12815 ins_cost(175);
12816 format %{ "rorxl $dst, $src, $shift" %}
12817 ins_encode %{
12818 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12819 %}
12820 ins_pipe(ialu_reg_mem);
12821 %}
12822
12823 // Rotate Right by variable
12824 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12825 %{
12826 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12827 match(Set dst (RotateRight dst shift));
12828 effect(KILL cr);
12829 format %{ "rorl $dst, $shift" %}
12830 ins_encode %{
12831 __ rorl($dst$$Register);
12832 %}
12833 ins_pipe(ialu_reg_reg);
12834 %}
12835
12836 // Rotate Right by variable
12837 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12838 %{
12839 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12840 match(Set dst (RotateRight src shift));
12841 effect(KILL cr);
12842 flag(PD::Flag_ndd_demotable_opr1);
12843
12844 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12845 ins_encode %{
12846 __ erorl($dst$$Register, $src$$Register, false);
12847 %}
12848 ins_pipe(ialu_reg_reg);
12849 %}
12850
12851 // Rotate Left by constant.
12852 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12853 %{
12854 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12855 match(Set dst (RotateLeft dst shift));
12856 effect(KILL cr);
12857 format %{ "rolq $dst, $shift" %}
12858 ins_encode %{
12859 __ rolq($dst$$Register, $shift$$constant);
12860 %}
12861 ins_pipe(ialu_reg);
12862 %}
12863
12864 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12865 %{
12866 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12867 match(Set dst (RotateLeft src shift));
12868 format %{ "rolxq $dst, $src, $shift" %}
12869 ins_encode %{
12870 int shift = 64 - ($shift$$constant & 63);
12871 __ rorxq($dst$$Register, $src$$Register, shift);
12872 %}
12873 ins_pipe(ialu_reg_reg);
12874 %}
12875
12876 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12877 %{
12878 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12879 match(Set dst (RotateLeft (LoadL src) shift));
12880 ins_cost(175);
12881 format %{ "rolxq $dst, $src, $shift" %}
12882 ins_encode %{
12883 int shift = 64 - ($shift$$constant & 63);
12884 __ rorxq($dst$$Register, $src$$Address, shift);
12885 %}
12886 ins_pipe(ialu_reg_mem);
12887 %}
12888
12889 // Rotate Left by variable
12890 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12891 %{
12892 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12893 match(Set dst (RotateLeft dst shift));
12894 effect(KILL cr);
12895
12896 format %{ "rolq $dst, $shift" %}
12897 ins_encode %{
12898 __ rolq($dst$$Register);
12899 %}
12900 ins_pipe(ialu_reg_reg);
12901 %}
12902
12903 // Rotate Left by variable
12904 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12905 %{
12906 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12907 match(Set dst (RotateLeft src shift));
12908 effect(KILL cr);
12909 flag(PD::Flag_ndd_demotable_opr1);
12910
12911 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12912 ins_encode %{
12913 __ erolq($dst$$Register, $src$$Register, false);
12914 %}
12915 ins_pipe(ialu_reg_reg);
12916 %}
12917
12918 // Rotate Right by constant.
12919 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12920 %{
12921 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12922 match(Set dst (RotateRight dst shift));
12923 effect(KILL cr);
12924 format %{ "rorq $dst, $shift" %}
12925 ins_encode %{
12926 __ rorq($dst$$Register, $shift$$constant);
12927 %}
12928 ins_pipe(ialu_reg);
12929 %}
12930
12931 // Rotate Right by constant
12932 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12933 %{
12934 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12935 match(Set dst (RotateRight src shift));
12936 format %{ "rorxq $dst, $src, $shift" %}
12937 ins_encode %{
12938 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12939 %}
12940 ins_pipe(ialu_reg_reg);
12941 %}
12942
12943 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12944 %{
12945 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12946 match(Set dst (RotateRight (LoadL src) shift));
12947 ins_cost(175);
12948 format %{ "rorxq $dst, $src, $shift" %}
12949 ins_encode %{
12950 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12951 %}
12952 ins_pipe(ialu_reg_mem);
12953 %}
12954
12955 // Rotate Right by variable
12956 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12957 %{
12958 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12959 match(Set dst (RotateRight dst shift));
12960 effect(KILL cr);
12961 format %{ "rorq $dst, $shift" %}
12962 ins_encode %{
12963 __ rorq($dst$$Register);
12964 %}
12965 ins_pipe(ialu_reg_reg);
12966 %}
12967
12968 // Rotate Right by variable
12969 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12970 %{
12971 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12972 match(Set dst (RotateRight src shift));
12973 effect(KILL cr);
12974 flag(PD::Flag_ndd_demotable_opr1);
12975
12976 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12977 ins_encode %{
12978 __ erorq($dst$$Register, $src$$Register, false);
12979 %}
12980 ins_pipe(ialu_reg_reg);
12981 %}
12982
12983 //----------------------------- CompressBits/ExpandBits ------------------------
12984
12985 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12986 predicate(n->bottom_type()->isa_long());
12987 match(Set dst (CompressBits src mask));
12988 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12989 ins_encode %{
12990 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12991 %}
12992 ins_pipe( pipe_slow );
12993 %}
12994
12995 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12996 predicate(n->bottom_type()->isa_long());
12997 match(Set dst (ExpandBits src mask));
12998 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12999 ins_encode %{
13000 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
13001 %}
13002 ins_pipe( pipe_slow );
13003 %}
13004
13005 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13006 predicate(n->bottom_type()->isa_long());
13007 match(Set dst (CompressBits src (LoadL mask)));
13008 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
13009 ins_encode %{
13010 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
13011 %}
13012 ins_pipe( pipe_slow );
13013 %}
13014
13015 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
13016 predicate(n->bottom_type()->isa_long());
13017 match(Set dst (ExpandBits src (LoadL mask)));
13018 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
13019 ins_encode %{
13020 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
13021 %}
13022 ins_pipe( pipe_slow );
13023 %}
13024
13025
13026 // Logical Instructions
13027
13028 // Integer Logical Instructions
13029
13030 // And Instructions
13031 // And Register with Register
13032 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13033 %{
13034 predicate(!UseAPX);
13035 match(Set dst (AndI dst src));
13036 effect(KILL cr);
13037 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13038
13039 format %{ "andl $dst, $src\t# int" %}
13040 ins_encode %{
13041 __ andl($dst$$Register, $src$$Register);
13042 %}
13043 ins_pipe(ialu_reg_reg);
13044 %}
13045
13046 // And Register with Register using New Data Destination (NDD)
13047 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13048 %{
13049 predicate(UseAPX);
13050 match(Set dst (AndI src1 src2));
13051 effect(KILL cr);
13052 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13053
13054 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13055 ins_encode %{
13056 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13057
13058 %}
13059 ins_pipe(ialu_reg_reg);
13060 %}
13061
13062 // And Register with Immediate 255
13063 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13064 %{
13065 match(Set dst (AndI src mask));
13066
13067 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
13068 ins_encode %{
13069 __ movzbl($dst$$Register, $src$$Register);
13070 %}
13071 ins_pipe(ialu_reg);
13072 %}
13073
13074 // And Register with Immediate 255 and promote to long
13075 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13076 %{
13077 match(Set dst (ConvI2L (AndI src mask)));
13078
13079 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13080 ins_encode %{
13081 __ movzbl($dst$$Register, $src$$Register);
13082 %}
13083 ins_pipe(ialu_reg);
13084 %}
13085
13086 // And Register with Immediate 65535
13087 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13088 %{
13089 match(Set dst (AndI src mask));
13090
13091 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13092 ins_encode %{
13093 __ movzwl($dst$$Register, $src$$Register);
13094 %}
13095 ins_pipe(ialu_reg);
13096 %}
13097
13098 // And Register with Immediate 65535 and promote to long
13099 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13100 %{
13101 match(Set dst (ConvI2L (AndI src mask)));
13102
13103 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13104 ins_encode %{
13105 __ movzwl($dst$$Register, $src$$Register);
13106 %}
13107 ins_pipe(ialu_reg);
13108 %}
13109
13110 // Can skip int2long conversions after AND with small bitmask
13111 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13112 %{
13113 predicate(VM_Version::supports_bmi2());
13114 ins_cost(125);
13115 effect(TEMP tmp, KILL cr);
13116 match(Set dst (ConvI2L (AndI src mask)));
13117 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13118 ins_encode %{
13119 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13120 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13121 %}
13122 ins_pipe(ialu_reg_reg);
13123 %}
13124
13125 // And Register with Immediate
13126 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13127 %{
13128 predicate(!UseAPX);
13129 match(Set dst (AndI dst src));
13130 effect(KILL cr);
13131 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13132
13133 format %{ "andl $dst, $src\t# int" %}
13134 ins_encode %{
13135 __ andl($dst$$Register, $src$$constant);
13136 %}
13137 ins_pipe(ialu_reg);
13138 %}
13139
13140 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13141 %{
13142 predicate(UseAPX);
13143 match(Set dst (AndI src1 src2));
13144 effect(KILL cr);
13145 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13146
13147 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13148 ins_encode %{
13149 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13150 %}
13151 ins_pipe(ialu_reg);
13152 %}
13153
13154 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13155 %{
13156 predicate(UseAPX);
13157 match(Set dst (AndI (LoadI src1) src2));
13158 effect(KILL cr);
13159 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13160
13161 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13162 ins_encode %{
13163 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13164 %}
13165 ins_pipe(ialu_reg);
13166 %}
13167
13168 // And Register with Memory
13169 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13170 %{
13171 predicate(!UseAPX);
13172 match(Set dst (AndI dst (LoadI src)));
13173 effect(KILL cr);
13174 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13175
13176 ins_cost(150);
13177 format %{ "andl $dst, $src\t# int" %}
13178 ins_encode %{
13179 __ andl($dst$$Register, $src$$Address);
13180 %}
13181 ins_pipe(ialu_reg_mem);
13182 %}
13183
13184 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13185 %{
13186 predicate(UseAPX);
13187 match(Set dst (AndI src1 (LoadI src2)));
13188 effect(KILL cr);
13189 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13190
13191 ins_cost(150);
13192 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13193 ins_encode %{
13194 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13195 %}
13196 ins_pipe(ialu_reg_mem);
13197 %}
13198
13199 // And Memory with Register
13200 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13201 %{
13202 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13203 effect(KILL cr);
13204 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13205
13206 ins_cost(150);
13207 format %{ "andb $dst, $src\t# byte" %}
13208 ins_encode %{
13209 __ andb($dst$$Address, $src$$Register);
13210 %}
13211 ins_pipe(ialu_mem_reg);
13212 %}
13213
13214 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13215 %{
13216 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13217 effect(KILL cr);
13218 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13219
13220 ins_cost(150);
13221 format %{ "andl $dst, $src\t# int" %}
13222 ins_encode %{
13223 __ andl($dst$$Address, $src$$Register);
13224 %}
13225 ins_pipe(ialu_mem_reg);
13226 %}
13227
13228 // And Memory with Immediate
13229 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13230 %{
13231 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13232 effect(KILL cr);
13233 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13234
13235 ins_cost(125);
13236 format %{ "andl $dst, $src\t# int" %}
13237 ins_encode %{
13238 __ andl($dst$$Address, $src$$constant);
13239 %}
13240 ins_pipe(ialu_mem_imm);
13241 %}
13242
13243 // BMI1 instructions
13244 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13245 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13246 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13247 effect(KILL cr);
13248 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13249
13250 ins_cost(125);
13251 format %{ "andnl $dst, $src1, $src2" %}
13252
13253 ins_encode %{
13254 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13255 %}
13256 ins_pipe(ialu_reg_mem);
13257 %}
13258
13259 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13260 match(Set dst (AndI (XorI src1 minus_1) src2));
13261 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13262 effect(KILL cr);
13263 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13264
13265 format %{ "andnl $dst, $src1, $src2" %}
13266
13267 ins_encode %{
13268 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13269 %}
13270 ins_pipe(ialu_reg);
13271 %}
13272
13273 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13274 match(Set dst (AndI (SubI imm_zero src) src));
13275 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13276 effect(KILL cr);
13277 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13278
13279 format %{ "blsil $dst, $src" %}
13280
13281 ins_encode %{
13282 __ blsil($dst$$Register, $src$$Register);
13283 %}
13284 ins_pipe(ialu_reg);
13285 %}
13286
13287 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13288 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13289 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13290 effect(KILL cr);
13291 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13292
13293 ins_cost(125);
13294 format %{ "blsil $dst, $src" %}
13295
13296 ins_encode %{
13297 __ blsil($dst$$Register, $src$$Address);
13298 %}
13299 ins_pipe(ialu_reg_mem);
13300 %}
13301
13302 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13303 %{
13304 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13305 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13306 effect(KILL cr);
13307 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13308
13309 ins_cost(125);
13310 format %{ "blsmskl $dst, $src" %}
13311
13312 ins_encode %{
13313 __ blsmskl($dst$$Register, $src$$Address);
13314 %}
13315 ins_pipe(ialu_reg_mem);
13316 %}
13317
13318 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13319 %{
13320 match(Set dst (XorI (AddI src minus_1) src));
13321 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13322 effect(KILL cr);
13323 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13324
13325 format %{ "blsmskl $dst, $src" %}
13326
13327 ins_encode %{
13328 __ blsmskl($dst$$Register, $src$$Register);
13329 %}
13330
13331 ins_pipe(ialu_reg);
13332 %}
13333
13334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13335 %{
13336 match(Set dst (AndI (AddI src minus_1) src) );
13337 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13338 effect(KILL cr);
13339 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13340
13341 format %{ "blsrl $dst, $src" %}
13342
13343 ins_encode %{
13344 __ blsrl($dst$$Register, $src$$Register);
13345 %}
13346
13347 ins_pipe(ialu_reg_mem);
13348 %}
13349
13350 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13351 %{
13352 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13353 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13354 effect(KILL cr);
13355 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13356
13357 ins_cost(125);
13358 format %{ "blsrl $dst, $src" %}
13359
13360 ins_encode %{
13361 __ blsrl($dst$$Register, $src$$Address);
13362 %}
13363
13364 ins_pipe(ialu_reg);
13365 %}
13366
13367 // Or Instructions
13368 // Or Register with Register
13369 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13370 %{
13371 predicate(!UseAPX);
13372 match(Set dst (OrI dst src));
13373 effect(KILL cr);
13374 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13375
13376 format %{ "orl $dst, $src\t# int" %}
13377 ins_encode %{
13378 __ orl($dst$$Register, $src$$Register);
13379 %}
13380 ins_pipe(ialu_reg_reg);
13381 %}
13382
13383 // Or Register with Register using New Data Destination (NDD)
13384 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13385 %{
13386 predicate(UseAPX);
13387 match(Set dst (OrI src1 src2));
13388 effect(KILL cr);
13389 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13390
13391 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13392 ins_encode %{
13393 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13394 %}
13395 ins_pipe(ialu_reg_reg);
13396 %}
13397
13398 // Or Register with Immediate
13399 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13400 %{
13401 predicate(!UseAPX);
13402 match(Set dst (OrI dst src));
13403 effect(KILL cr);
13404 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13405
13406 format %{ "orl $dst, $src\t# int" %}
13407 ins_encode %{
13408 __ orl($dst$$Register, $src$$constant);
13409 %}
13410 ins_pipe(ialu_reg);
13411 %}
13412
13413 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13414 %{
13415 predicate(UseAPX);
13416 match(Set dst (OrI src1 src2));
13417 effect(KILL cr);
13418 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13419
13420 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13421 ins_encode %{
13422 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13423 %}
13424 ins_pipe(ialu_reg);
13425 %}
13426
13427 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13428 %{
13429 predicate(UseAPX);
13430 match(Set dst (OrI src1 src2));
13431 effect(KILL cr);
13432 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13433
13434 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13435 ins_encode %{
13436 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13437 %}
13438 ins_pipe(ialu_reg);
13439 %}
13440
13441 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13442 %{
13443 predicate(UseAPX);
13444 match(Set dst (OrI (LoadI src1) src2));
13445 effect(KILL cr);
13446 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13447
13448 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13449 ins_encode %{
13450 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13451 %}
13452 ins_pipe(ialu_reg);
13453 %}
13454
13455 // Or Register with Memory
13456 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13457 %{
13458 predicate(!UseAPX);
13459 match(Set dst (OrI dst (LoadI src)));
13460 effect(KILL cr);
13461 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13462
13463 ins_cost(150);
13464 format %{ "orl $dst, $src\t# int" %}
13465 ins_encode %{
13466 __ orl($dst$$Register, $src$$Address);
13467 %}
13468 ins_pipe(ialu_reg_mem);
13469 %}
13470
13471 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13472 %{
13473 predicate(UseAPX);
13474 match(Set dst (OrI src1 (LoadI src2)));
13475 effect(KILL cr);
13476 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13477
13478 ins_cost(150);
13479 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13480 ins_encode %{
13481 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13482 %}
13483 ins_pipe(ialu_reg_mem);
13484 %}
13485
13486 // Or Memory with Register
13487 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13488 %{
13489 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13490 effect(KILL cr);
13491 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13492
13493 ins_cost(150);
13494 format %{ "orb $dst, $src\t# byte" %}
13495 ins_encode %{
13496 __ orb($dst$$Address, $src$$Register);
13497 %}
13498 ins_pipe(ialu_mem_reg);
13499 %}
13500
13501 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13502 %{
13503 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13504 effect(KILL cr);
13505 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13506
13507 ins_cost(150);
13508 format %{ "orl $dst, $src\t# int" %}
13509 ins_encode %{
13510 __ orl($dst$$Address, $src$$Register);
13511 %}
13512 ins_pipe(ialu_mem_reg);
13513 %}
13514
13515 // Or Memory with Immediate
13516 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13517 %{
13518 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13519 effect(KILL cr);
13520 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13521
13522 ins_cost(125);
13523 format %{ "orl $dst, $src\t# int" %}
13524 ins_encode %{
13525 __ orl($dst$$Address, $src$$constant);
13526 %}
13527 ins_pipe(ialu_mem_imm);
13528 %}
13529
13530 // Xor Instructions
13531 // Xor Register with Register
13532 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13533 %{
13534 predicate(!UseAPX);
13535 match(Set dst (XorI dst src));
13536 effect(KILL cr);
13537 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13538
13539 format %{ "xorl $dst, $src\t# int" %}
13540 ins_encode %{
13541 __ xorl($dst$$Register, $src$$Register);
13542 %}
13543 ins_pipe(ialu_reg_reg);
13544 %}
13545
13546 // Xor Register with Register using New Data Destination (NDD)
13547 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13548 %{
13549 predicate(UseAPX);
13550 match(Set dst (XorI src1 src2));
13551 effect(KILL cr);
13552 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13553
13554 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13555 ins_encode %{
13556 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13557 %}
13558 ins_pipe(ialu_reg_reg);
13559 %}
13560
13561 // Xor Register with Immediate -1
13562 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13563 %{
13564 predicate(!UseAPX);
13565 match(Set dst (XorI dst imm));
13566
13567 format %{ "notl $dst" %}
13568 ins_encode %{
13569 __ notl($dst$$Register);
13570 %}
13571 ins_pipe(ialu_reg);
13572 %}
13573
13574 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13575 %{
13576 match(Set dst (XorI src imm));
13577 predicate(UseAPX);
13578 flag(PD::Flag_ndd_demotable_opr1);
13579
13580 format %{ "enotl $dst, $src" %}
13581 ins_encode %{
13582 __ enotl($dst$$Register, $src$$Register);
13583 %}
13584 ins_pipe(ialu_reg);
13585 %}
13586
13587 // Xor Register with Immediate
13588 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13589 %{
13590 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13591 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13592 match(Set dst (XorI dst src));
13593 effect(KILL cr);
13594 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13595
13596 format %{ "xorl $dst, $src\t# int" %}
13597 ins_encode %{
13598 __ xorl($dst$$Register, $src$$constant);
13599 %}
13600 ins_pipe(ialu_reg);
13601 %}
13602
13603 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13604 %{
13605 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13606 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13607 match(Set dst (XorI src1 src2));
13608 effect(KILL cr);
13609 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13610
13611 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13612 ins_encode %{
13613 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13614 %}
13615 ins_pipe(ialu_reg);
13616 %}
13617
13618 // Xor Memory with Immediate
13619 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13620 %{
13621 predicate(UseAPX);
13622 match(Set dst (XorI (LoadI src1) src2));
13623 effect(KILL cr);
13624 ins_cost(150);
13625 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13626
13627 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13628 ins_encode %{
13629 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13630 %}
13631 ins_pipe(ialu_reg);
13632 %}
13633
13634 // Xor Register with Memory
13635 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13636 %{
13637 predicate(!UseAPX);
13638 match(Set dst (XorI dst (LoadI src)));
13639 effect(KILL cr);
13640 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13641
13642 ins_cost(150);
13643 format %{ "xorl $dst, $src\t# int" %}
13644 ins_encode %{
13645 __ xorl($dst$$Register, $src$$Address);
13646 %}
13647 ins_pipe(ialu_reg_mem);
13648 %}
13649
13650 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13651 %{
13652 predicate(UseAPX);
13653 match(Set dst (XorI src1 (LoadI src2)));
13654 effect(KILL cr);
13655 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13656
13657 ins_cost(150);
13658 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13659 ins_encode %{
13660 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13661 %}
13662 ins_pipe(ialu_reg_mem);
13663 %}
13664
13665 // Xor Memory with Register
13666 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13667 %{
13668 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13669 effect(KILL cr);
13670 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13671
13672 ins_cost(150);
13673 format %{ "xorb $dst, $src\t# byte" %}
13674 ins_encode %{
13675 __ xorb($dst$$Address, $src$$Register);
13676 %}
13677 ins_pipe(ialu_mem_reg);
13678 %}
13679
13680 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13681 %{
13682 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13683 effect(KILL cr);
13684 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13685
13686 ins_cost(150);
13687 format %{ "xorl $dst, $src\t# int" %}
13688 ins_encode %{
13689 __ xorl($dst$$Address, $src$$Register);
13690 %}
13691 ins_pipe(ialu_mem_reg);
13692 %}
13693
13694 // Xor Memory with Immediate
13695 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13696 %{
13697 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13698 effect(KILL cr);
13699 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13700
13701 ins_cost(125);
13702 format %{ "xorl $dst, $src\t# int" %}
13703 ins_encode %{
13704 __ xorl($dst$$Address, $src$$constant);
13705 %}
13706 ins_pipe(ialu_mem_imm);
13707 %}
13708
13709
13710 // Long Logical Instructions
13711
13712 // And Instructions
13713 // And Register with Register
13714 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13715 %{
13716 predicate(!UseAPX);
13717 match(Set dst (AndL dst src));
13718 effect(KILL cr);
13719 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13720
13721 format %{ "andq $dst, $src\t# long" %}
13722 ins_encode %{
13723 __ andq($dst$$Register, $src$$Register);
13724 %}
13725 ins_pipe(ialu_reg_reg);
13726 %}
13727
13728 // And Register with Register using New Data Destination (NDD)
13729 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13730 %{
13731 predicate(UseAPX);
13732 match(Set dst (AndL src1 src2));
13733 effect(KILL cr);
13734 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13735
13736 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13737 ins_encode %{
13738 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13739
13740 %}
13741 ins_pipe(ialu_reg_reg);
13742 %}
13743
13744 // And Register with Immediate 255
13745 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13746 %{
13747 match(Set dst (AndL src mask));
13748
13749 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13750 ins_encode %{
13751 // movzbl zeroes out the upper 32-bit and does not need REX.W
13752 __ movzbl($dst$$Register, $src$$Register);
13753 %}
13754 ins_pipe(ialu_reg);
13755 %}
13756
13757 // And Register with Immediate 65535
13758 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13759 %{
13760 match(Set dst (AndL src mask));
13761
13762 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13763 ins_encode %{
13764 // movzwl zeroes out the upper 32-bit and does not need REX.W
13765 __ movzwl($dst$$Register, $src$$Register);
13766 %}
13767 ins_pipe(ialu_reg);
13768 %}
13769
13770 // And Register with Immediate
13771 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13772 %{
13773 predicate(!UseAPX);
13774 match(Set dst (AndL dst src));
13775 effect(KILL cr);
13776 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13777
13778 format %{ "andq $dst, $src\t# long" %}
13779 ins_encode %{
13780 __ andq($dst$$Register, $src$$constant);
13781 %}
13782 ins_pipe(ialu_reg);
13783 %}
13784
13785 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13786 %{
13787 predicate(UseAPX);
13788 match(Set dst (AndL src1 src2));
13789 effect(KILL cr);
13790 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13791
13792 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13793 ins_encode %{
13794 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13795 %}
13796 ins_pipe(ialu_reg);
13797 %}
13798
13799 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13800 %{
13801 predicate(UseAPX);
13802 match(Set dst (AndL (LoadL src1) src2));
13803 effect(KILL cr);
13804 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13805
13806 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13807 ins_encode %{
13808 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13809 %}
13810 ins_pipe(ialu_reg);
13811 %}
13812
13813 // And Register with Memory
13814 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13815 %{
13816 predicate(!UseAPX);
13817 match(Set dst (AndL dst (LoadL src)));
13818 effect(KILL cr);
13819 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13820
13821 ins_cost(150);
13822 format %{ "andq $dst, $src\t# long" %}
13823 ins_encode %{
13824 __ andq($dst$$Register, $src$$Address);
13825 %}
13826 ins_pipe(ialu_reg_mem);
13827 %}
13828
13829 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13830 %{
13831 predicate(UseAPX);
13832 match(Set dst (AndL src1 (LoadL src2)));
13833 effect(KILL cr);
13834 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13835
13836 ins_cost(150);
13837 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13838 ins_encode %{
13839 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13840 %}
13841 ins_pipe(ialu_reg_mem);
13842 %}
13843
13844 // And Memory with Register
13845 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13846 %{
13847 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13848 effect(KILL cr);
13849 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13850
13851 ins_cost(150);
13852 format %{ "andq $dst, $src\t# long" %}
13853 ins_encode %{
13854 __ andq($dst$$Address, $src$$Register);
13855 %}
13856 ins_pipe(ialu_mem_reg);
13857 %}
13858
13859 // And Memory with Immediate
13860 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13861 %{
13862 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13863 effect(KILL cr);
13864 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13865
13866 ins_cost(125);
13867 format %{ "andq $dst, $src\t# long" %}
13868 ins_encode %{
13869 __ andq($dst$$Address, $src$$constant);
13870 %}
13871 ins_pipe(ialu_mem_imm);
13872 %}
13873
13874 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13875 %{
13876 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13877 // because AND/OR works well enough for 8/32-bit values.
13878 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13879
13880 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13881 effect(KILL cr);
13882
13883 ins_cost(125);
13884 format %{ "btrq $dst, log2(not($con))\t# long" %}
13885 ins_encode %{
13886 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13887 %}
13888 ins_pipe(ialu_mem_imm);
13889 %}
13890
13891 // BMI1 instructions
13892 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13893 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13894 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13895 effect(KILL cr);
13896 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13897
13898 ins_cost(125);
13899 format %{ "andnq $dst, $src1, $src2" %}
13900
13901 ins_encode %{
13902 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13903 %}
13904 ins_pipe(ialu_reg_mem);
13905 %}
13906
13907 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13908 match(Set dst (AndL (XorL src1 minus_1) src2));
13909 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13910 effect(KILL cr);
13911 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13912
13913 format %{ "andnq $dst, $src1, $src2" %}
13914
13915 ins_encode %{
13916 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13917 %}
13918 ins_pipe(ialu_reg_mem);
13919 %}
13920
13921 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13922 match(Set dst (AndL (SubL imm_zero src) src));
13923 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13924 effect(KILL cr);
13925 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13926
13927 format %{ "blsiq $dst, $src" %}
13928
13929 ins_encode %{
13930 __ blsiq($dst$$Register, $src$$Register);
13931 %}
13932 ins_pipe(ialu_reg);
13933 %}
13934
13935 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13936 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13937 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13938 effect(KILL cr);
13939 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13940
13941 ins_cost(125);
13942 format %{ "blsiq $dst, $src" %}
13943
13944 ins_encode %{
13945 __ blsiq($dst$$Register, $src$$Address);
13946 %}
13947 ins_pipe(ialu_reg_mem);
13948 %}
13949
13950 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13951 %{
13952 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13953 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13954 effect(KILL cr);
13955 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13956
13957 ins_cost(125);
13958 format %{ "blsmskq $dst, $src" %}
13959
13960 ins_encode %{
13961 __ blsmskq($dst$$Register, $src$$Address);
13962 %}
13963 ins_pipe(ialu_reg_mem);
13964 %}
13965
13966 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13967 %{
13968 match(Set dst (XorL (AddL src minus_1) src));
13969 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13970 effect(KILL cr);
13971 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13972
13973 format %{ "blsmskq $dst, $src" %}
13974
13975 ins_encode %{
13976 __ blsmskq($dst$$Register, $src$$Register);
13977 %}
13978
13979 ins_pipe(ialu_reg);
13980 %}
13981
13982 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13983 %{
13984 match(Set dst (AndL (AddL src minus_1) src) );
13985 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13986 effect(KILL cr);
13987 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13988
13989 format %{ "blsrq $dst, $src" %}
13990
13991 ins_encode %{
13992 __ blsrq($dst$$Register, $src$$Register);
13993 %}
13994
13995 ins_pipe(ialu_reg);
13996 %}
13997
13998 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13999 %{
14000 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
14001 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
14002 effect(KILL cr);
14003 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
14004
14005 ins_cost(125);
14006 format %{ "blsrq $dst, $src" %}
14007
14008 ins_encode %{
14009 __ blsrq($dst$$Register, $src$$Address);
14010 %}
14011
14012 ins_pipe(ialu_reg);
14013 %}
14014
14015 // Or Instructions
14016 // Or Register with Register
14017 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14018 %{
14019 predicate(!UseAPX);
14020 match(Set dst (OrL dst src));
14021 effect(KILL cr);
14022 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14023
14024 format %{ "orq $dst, $src\t# long" %}
14025 ins_encode %{
14026 __ orq($dst$$Register, $src$$Register);
14027 %}
14028 ins_pipe(ialu_reg_reg);
14029 %}
14030
14031 // Or Register with Register using New Data Destination (NDD)
14032 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14033 %{
14034 predicate(UseAPX);
14035 match(Set dst (OrL src1 src2));
14036 effect(KILL cr);
14037 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14038
14039 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14040 ins_encode %{
14041 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14042
14043 %}
14044 ins_pipe(ialu_reg_reg);
14045 %}
14046
14047 // Use any_RegP to match R15 (TLS register) without spilling.
14048 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14049 predicate(!UseAPX);
14050 match(Set dst (OrL dst (CastP2X src)));
14051 effect(KILL cr);
14052 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14053
14054 format %{ "orq $dst, $src\t# long" %}
14055 ins_encode %{
14056 __ orq($dst$$Register, $src$$Register);
14057 %}
14058 ins_pipe(ialu_reg_reg);
14059 %}
14060
14061 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14062 predicate(UseAPX);
14063 match(Set dst (OrL src1 (CastP2X src2)));
14064 effect(KILL cr);
14065 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14066
14067 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14068 ins_encode %{
14069 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14070 %}
14071 ins_pipe(ialu_reg_reg);
14072 %}
14073
14074 // Or Register with Immediate
14075 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14076 %{
14077 predicate(!UseAPX);
14078 match(Set dst (OrL dst src));
14079 effect(KILL cr);
14080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081
14082 format %{ "orq $dst, $src\t# long" %}
14083 ins_encode %{
14084 __ orq($dst$$Register, $src$$constant);
14085 %}
14086 ins_pipe(ialu_reg);
14087 %}
14088
14089 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14090 %{
14091 predicate(UseAPX);
14092 match(Set dst (OrL src1 src2));
14093 effect(KILL cr);
14094 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14095
14096 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14097 ins_encode %{
14098 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14099 %}
14100 ins_pipe(ialu_reg);
14101 %}
14102
14103 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14104 %{
14105 predicate(UseAPX);
14106 match(Set dst (OrL src1 src2));
14107 effect(KILL cr);
14108 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14109
14110 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14111 ins_encode %{
14112 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14113 %}
14114 ins_pipe(ialu_reg);
14115 %}
14116
14117 // Or Memory with Immediate
14118 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14119 %{
14120 predicate(UseAPX);
14121 match(Set dst (OrL (LoadL src1) src2));
14122 effect(KILL cr);
14123 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14124
14125 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14126 ins_encode %{
14127 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14128 %}
14129 ins_pipe(ialu_reg);
14130 %}
14131
14132 // Or Register with Memory
14133 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14134 %{
14135 predicate(!UseAPX);
14136 match(Set dst (OrL dst (LoadL src)));
14137 effect(KILL cr);
14138 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14139
14140 ins_cost(150);
14141 format %{ "orq $dst, $src\t# long" %}
14142 ins_encode %{
14143 __ orq($dst$$Register, $src$$Address);
14144 %}
14145 ins_pipe(ialu_reg_mem);
14146 %}
14147
14148 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14149 %{
14150 predicate(UseAPX);
14151 match(Set dst (OrL src1 (LoadL src2)));
14152 effect(KILL cr);
14153 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14154
14155 ins_cost(150);
14156 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14157 ins_encode %{
14158 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14159 %}
14160 ins_pipe(ialu_reg_mem);
14161 %}
14162
14163 // Or Memory with Register
14164 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14165 %{
14166 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14167 effect(KILL cr);
14168 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14169
14170 ins_cost(150);
14171 format %{ "orq $dst, $src\t# long" %}
14172 ins_encode %{
14173 __ orq($dst$$Address, $src$$Register);
14174 %}
14175 ins_pipe(ialu_mem_reg);
14176 %}
14177
14178 // Or Memory with Immediate
14179 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14180 %{
14181 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14182 effect(KILL cr);
14183 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14184
14185 ins_cost(125);
14186 format %{ "orq $dst, $src\t# long" %}
14187 ins_encode %{
14188 __ orq($dst$$Address, $src$$constant);
14189 %}
14190 ins_pipe(ialu_mem_imm);
14191 %}
14192
14193 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14194 %{
14195 // con should be a pure 64-bit power of 2 immediate
14196 // because AND/OR works well enough for 8/32-bit values.
14197 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14198
14199 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14200 effect(KILL cr);
14201
14202 ins_cost(125);
14203 format %{ "btsq $dst, log2($con)\t# long" %}
14204 ins_encode %{
14205 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14206 %}
14207 ins_pipe(ialu_mem_imm);
14208 %}
14209
14210 // Xor Instructions
14211 // Xor Register with Register
14212 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14213 %{
14214 predicate(!UseAPX);
14215 match(Set dst (XorL dst src));
14216 effect(KILL cr);
14217 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14218
14219 format %{ "xorq $dst, $src\t# long" %}
14220 ins_encode %{
14221 __ xorq($dst$$Register, $src$$Register);
14222 %}
14223 ins_pipe(ialu_reg_reg);
14224 %}
14225
14226 // Xor Register with Register using New Data Destination (NDD)
14227 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14228 %{
14229 predicate(UseAPX);
14230 match(Set dst (XorL src1 src2));
14231 effect(KILL cr);
14232 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14233
14234 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14235 ins_encode %{
14236 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14237 %}
14238 ins_pipe(ialu_reg_reg);
14239 %}
14240
14241 // Xor Register with Immediate -1
14242 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14243 %{
14244 predicate(!UseAPX);
14245 match(Set dst (XorL dst imm));
14246
14247 format %{ "notq $dst" %}
14248 ins_encode %{
14249 __ notq($dst$$Register);
14250 %}
14251 ins_pipe(ialu_reg);
14252 %}
14253
14254 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14255 %{
14256 predicate(UseAPX);
14257 match(Set dst (XorL src imm));
14258 flag(PD::Flag_ndd_demotable_opr1);
14259
14260 format %{ "enotq $dst, $src" %}
14261 ins_encode %{
14262 __ enotq($dst$$Register, $src$$Register);
14263 %}
14264 ins_pipe(ialu_reg);
14265 %}
14266
14267 // Xor Register with Immediate
14268 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14269 %{
14270 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14271 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14272 match(Set dst (XorL dst src));
14273 effect(KILL cr);
14274 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14275
14276 format %{ "xorq $dst, $src\t# long" %}
14277 ins_encode %{
14278 __ xorq($dst$$Register, $src$$constant);
14279 %}
14280 ins_pipe(ialu_reg);
14281 %}
14282
14283 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14284 %{
14285 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14286 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14287 match(Set dst (XorL src1 src2));
14288 effect(KILL cr);
14289 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14290
14291 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14292 ins_encode %{
14293 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14294 %}
14295 ins_pipe(ialu_reg);
14296 %}
14297
14298 // Xor Memory with Immediate
14299 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14300 %{
14301 predicate(UseAPX);
14302 match(Set dst (XorL (LoadL src1) src2));
14303 effect(KILL cr);
14304 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14305 ins_cost(150);
14306
14307 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14308 ins_encode %{
14309 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14310 %}
14311 ins_pipe(ialu_reg);
14312 %}
14313
14314 // Xor Register with Memory
14315 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14316 %{
14317 predicate(!UseAPX);
14318 match(Set dst (XorL dst (LoadL src)));
14319 effect(KILL cr);
14320 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14321
14322 ins_cost(150);
14323 format %{ "xorq $dst, $src\t# long" %}
14324 ins_encode %{
14325 __ xorq($dst$$Register, $src$$Address);
14326 %}
14327 ins_pipe(ialu_reg_mem);
14328 %}
14329
14330 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14331 %{
14332 predicate(UseAPX);
14333 match(Set dst (XorL src1 (LoadL src2)));
14334 effect(KILL cr);
14335 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14336
14337 ins_cost(150);
14338 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14339 ins_encode %{
14340 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14341 %}
14342 ins_pipe(ialu_reg_mem);
14343 %}
14344
14345 // Xor Memory with Register
14346 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14347 %{
14348 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14349 effect(KILL cr);
14350 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14351
14352 ins_cost(150);
14353 format %{ "xorq $dst, $src\t# long" %}
14354 ins_encode %{
14355 __ xorq($dst$$Address, $src$$Register);
14356 %}
14357 ins_pipe(ialu_mem_reg);
14358 %}
14359
14360 // Xor Memory with Immediate
14361 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14362 %{
14363 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14364 effect(KILL cr);
14365 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14366
14367 ins_cost(125);
14368 format %{ "xorq $dst, $src\t# long" %}
14369 ins_encode %{
14370 __ xorq($dst$$Address, $src$$constant);
14371 %}
14372 ins_pipe(ialu_mem_imm);
14373 %}
14374
14375 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14376 %{
14377 match(Set dst (CmpLTMask p q));
14378 effect(KILL cr);
14379
14380 ins_cost(400);
14381 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14382 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14383 "negl $dst" %}
14384 ins_encode %{
14385 __ cmpl($p$$Register, $q$$Register);
14386 __ setcc(Assembler::less, $dst$$Register);
14387 __ negl($dst$$Register);
14388 %}
14389 ins_pipe(pipe_slow);
14390 %}
14391
14392 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14393 %{
14394 match(Set dst (CmpLTMask dst zero));
14395 effect(KILL cr);
14396
14397 ins_cost(100);
14398 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14399 ins_encode %{
14400 __ sarl($dst$$Register, 31);
14401 %}
14402 ins_pipe(ialu_reg);
14403 %}
14404
14405 /* Better to save a register than avoid a branch */
14406 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14407 %{
14408 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14409 effect(KILL cr);
14410 ins_cost(300);
14411 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14412 "jge done\n\t"
14413 "addl $p,$y\n"
14414 "done: " %}
14415 ins_encode %{
14416 Register Rp = $p$$Register;
14417 Register Rq = $q$$Register;
14418 Register Ry = $y$$Register;
14419 Label done;
14420 __ subl(Rp, Rq);
14421 __ jccb(Assembler::greaterEqual, done);
14422 __ addl(Rp, Ry);
14423 __ bind(done);
14424 %}
14425 ins_pipe(pipe_cmplt);
14426 %}
14427
14428 /* Better to save a register than avoid a branch */
14429 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14430 %{
14431 match(Set y (AndI (CmpLTMask p q) y));
14432 effect(KILL cr);
14433
14434 ins_cost(300);
14435
14436 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14437 "jlt done\n\t"
14438 "xorl $y, $y\n"
14439 "done: " %}
14440 ins_encode %{
14441 Register Rp = $p$$Register;
14442 Register Rq = $q$$Register;
14443 Register Ry = $y$$Register;
14444 Label done;
14445 __ cmpl(Rp, Rq);
14446 __ jccb(Assembler::less, done);
14447 __ xorl(Ry, Ry);
14448 __ bind(done);
14449 %}
14450 ins_pipe(pipe_cmplt);
14451 %}
14452
14453
14454 //---------- FP Instructions------------------------------------------------
14455
14456 // Really expensive, avoid
14457 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14458 %{
14459 match(Set cr (CmpF src1 src2));
14460
14461 ins_cost(500);
14462 format %{ "ucomiss $src1, $src2\n\t"
14463 "jnp,s exit\n\t"
14464 "pushfq\t# saw NaN, set CF\n\t"
14465 "andq [rsp], #0xffffff2b\n\t"
14466 "popfq\n"
14467 "exit:" %}
14468 ins_encode %{
14469 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14470 emit_cmpfp_fixup(masm);
14471 %}
14472 ins_pipe(pipe_slow);
14473 %}
14474
14475 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14476 match(Set cr (CmpF src1 src2));
14477
14478 ins_cost(100);
14479 format %{ "ucomiss $src1, $src2" %}
14480 ins_encode %{
14481 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14482 %}
14483 ins_pipe(pipe_slow);
14484 %}
14485
14486 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14487 match(Set cr (CmpF src1 src2));
14488
14489 ins_cost(100);
14490 format %{ "evucomxss $src1, $src2" %}
14491 ins_encode %{
14492 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14493 %}
14494 ins_pipe(pipe_slow);
14495 %}
14496
14497 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14498 match(Set cr (CmpF src1 (LoadF src2)));
14499
14500 ins_cost(100);
14501 format %{ "ucomiss $src1, $src2" %}
14502 ins_encode %{
14503 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14504 %}
14505 ins_pipe(pipe_slow);
14506 %}
14507
14508 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14509 match(Set cr (CmpF src1 (LoadF src2)));
14510
14511 ins_cost(100);
14512 format %{ "evucomxss $src1, $src2" %}
14513 ins_encode %{
14514 __ evucomxss($src1$$XMMRegister, $src2$$Address);
14515 %}
14516 ins_pipe(pipe_slow);
14517 %}
14518
14519 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14520 match(Set cr (CmpF src con));
14521
14522 ins_cost(100);
14523 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14524 ins_encode %{
14525 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14526 %}
14527 ins_pipe(pipe_slow);
14528 %}
14529
14530 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14531 match(Set cr (CmpF src con));
14532
14533 ins_cost(100);
14534 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14535 ins_encode %{
14536 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14537 %}
14538 ins_pipe(pipe_slow);
14539 %}
14540
14541 // Really expensive, avoid
14542 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14543 %{
14544 match(Set cr (CmpD src1 src2));
14545
14546 ins_cost(500);
14547 format %{ "ucomisd $src1, $src2\n\t"
14548 "jnp,s exit\n\t"
14549 "pushfq\t# saw NaN, set CF\n\t"
14550 "andq [rsp], #0xffffff2b\n\t"
14551 "popfq\n"
14552 "exit:" %}
14553 ins_encode %{
14554 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14555 emit_cmpfp_fixup(masm);
14556 %}
14557 ins_pipe(pipe_slow);
14558 %}
14559
14560 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14561 match(Set cr (CmpD src1 src2));
14562
14563 ins_cost(100);
14564 format %{ "ucomisd $src1, $src2 test" %}
14565 ins_encode %{
14566 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14567 %}
14568 ins_pipe(pipe_slow);
14569 %}
14570
14571 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14572 match(Set cr (CmpD src1 src2));
14573
14574 ins_cost(100);
14575 format %{ "evucomxsd $src1, $src2 test" %}
14576 ins_encode %{
14577 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14578 %}
14579 ins_pipe(pipe_slow);
14580 %}
14581
14582 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14583 match(Set cr (CmpD src1 (LoadD src2)));
14584
14585 ins_cost(100);
14586 format %{ "ucomisd $src1, $src2" %}
14587 ins_encode %{
14588 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14589 %}
14590 ins_pipe(pipe_slow);
14591 %}
14592
14593 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14594 match(Set cr (CmpD src1 (LoadD src2)));
14595
14596 ins_cost(100);
14597 format %{ "evucomxsd $src1, $src2" %}
14598 ins_encode %{
14599 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14600 %}
14601 ins_pipe(pipe_slow);
14602 %}
14603
14604 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14605 match(Set cr (CmpD src con));
14606 ins_cost(100);
14607 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14608 ins_encode %{
14609 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14610 %}
14611 ins_pipe(pipe_slow);
14612 %}
14613
14614 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14615 match(Set cr (CmpD src con));
14616
14617 ins_cost(100);
14618 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14619 ins_encode %{
14620 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14621 %}
14622 ins_pipe(pipe_slow);
14623 %}
14624
14625 // Compare into -1,0,1
14626 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14627 %{
14628 match(Set dst (CmpF3 src1 src2));
14629 effect(KILL cr);
14630
14631 ins_cost(275);
14632 format %{ "ucomiss $src1, $src2\n\t"
14633 "movl $dst, #-1\n\t"
14634 "jp,s done\n\t"
14635 "jb,s done\n\t"
14636 "setne $dst\n\t"
14637 "movzbl $dst, $dst\n"
14638 "done:" %}
14639 ins_encode %{
14640 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14641 emit_cmpfp3(masm, $dst$$Register);
14642 %}
14643 ins_pipe(pipe_slow);
14644 %}
14645
14646 // Compare into -1,0,1
14647 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14648 %{
14649 match(Set dst (CmpF3 src1 (LoadF src2)));
14650 effect(KILL cr);
14651
14652 ins_cost(275);
14653 format %{ "ucomiss $src1, $src2\n\t"
14654 "movl $dst, #-1\n\t"
14655 "jp,s done\n\t"
14656 "jb,s done\n\t"
14657 "setne $dst\n\t"
14658 "movzbl $dst, $dst\n"
14659 "done:" %}
14660 ins_encode %{
14661 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14662 emit_cmpfp3(masm, $dst$$Register);
14663 %}
14664 ins_pipe(pipe_slow);
14665 %}
14666
14667 // Compare into -1,0,1
14668 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14669 match(Set dst (CmpF3 src con));
14670 effect(KILL cr);
14671
14672 ins_cost(275);
14673 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14674 "movl $dst, #-1\n\t"
14675 "jp,s done\n\t"
14676 "jb,s done\n\t"
14677 "setne $dst\n\t"
14678 "movzbl $dst, $dst\n"
14679 "done:" %}
14680 ins_encode %{
14681 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14682 emit_cmpfp3(masm, $dst$$Register);
14683 %}
14684 ins_pipe(pipe_slow);
14685 %}
14686
14687 // Compare into -1,0,1
14688 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14689 %{
14690 match(Set dst (CmpD3 src1 src2));
14691 effect(KILL cr);
14692
14693 ins_cost(275);
14694 format %{ "ucomisd $src1, $src2\n\t"
14695 "movl $dst, #-1\n\t"
14696 "jp,s done\n\t"
14697 "jb,s done\n\t"
14698 "setne $dst\n\t"
14699 "movzbl $dst, $dst\n"
14700 "done:" %}
14701 ins_encode %{
14702 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14703 emit_cmpfp3(masm, $dst$$Register);
14704 %}
14705 ins_pipe(pipe_slow);
14706 %}
14707
14708 // Compare into -1,0,1
14709 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14710 %{
14711 match(Set dst (CmpD3 src1 (LoadD src2)));
14712 effect(KILL cr);
14713
14714 ins_cost(275);
14715 format %{ "ucomisd $src1, $src2\n\t"
14716 "movl $dst, #-1\n\t"
14717 "jp,s done\n\t"
14718 "jb,s done\n\t"
14719 "setne $dst\n\t"
14720 "movzbl $dst, $dst\n"
14721 "done:" %}
14722 ins_encode %{
14723 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14724 emit_cmpfp3(masm, $dst$$Register);
14725 %}
14726 ins_pipe(pipe_slow);
14727 %}
14728
14729 // Compare into -1,0,1
14730 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14731 match(Set dst (CmpD3 src con));
14732 effect(KILL cr);
14733
14734 ins_cost(275);
14735 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14736 "movl $dst, #-1\n\t"
14737 "jp,s done\n\t"
14738 "jb,s done\n\t"
14739 "setne $dst\n\t"
14740 "movzbl $dst, $dst\n"
14741 "done:" %}
14742 ins_encode %{
14743 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14744 emit_cmpfp3(masm, $dst$$Register);
14745 %}
14746 ins_pipe(pipe_slow);
14747 %}
14748
14749 //----------Arithmetic Conversion Instructions---------------------------------
14750
14751 instruct convF2D_reg_reg(regD dst, regF src)
14752 %{
14753 match(Set dst (ConvF2D src));
14754
14755 format %{ "cvtss2sd $dst, $src" %}
14756 ins_encode %{
14757 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14758 %}
14759 ins_pipe(pipe_slow); // XXX
14760 %}
14761
14762 instruct convF2D_reg_mem(regD dst, memory src)
14763 %{
14764 predicate(UseAVX == 0);
14765 match(Set dst (ConvF2D (LoadF src)));
14766
14767 format %{ "cvtss2sd $dst, $src" %}
14768 ins_encode %{
14769 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14770 %}
14771 ins_pipe(pipe_slow); // XXX
14772 %}
14773
14774 instruct convD2F_reg_reg(regF dst, regD src)
14775 %{
14776 match(Set dst (ConvD2F src));
14777
14778 format %{ "cvtsd2ss $dst, $src" %}
14779 ins_encode %{
14780 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14781 %}
14782 ins_pipe(pipe_slow); // XXX
14783 %}
14784
14785 instruct convD2F_reg_mem(regF dst, memory src)
14786 %{
14787 predicate(UseAVX == 0);
14788 match(Set dst (ConvD2F (LoadD src)));
14789
14790 format %{ "cvtsd2ss $dst, $src" %}
14791 ins_encode %{
14792 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14793 %}
14794 ins_pipe(pipe_slow); // XXX
14795 %}
14796
14797 // XXX do mem variants
14798 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14799 %{
14800 predicate(!VM_Version::supports_avx10_2());
14801 match(Set dst (ConvF2I src));
14802 effect(KILL cr);
14803 format %{ "convert_f2i $dst, $src" %}
14804 ins_encode %{
14805 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14806 %}
14807 ins_pipe(pipe_slow);
14808 %}
14809
14810 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14811 %{
14812 predicate(VM_Version::supports_avx10_2());
14813 match(Set dst (ConvF2I src));
14814 format %{ "evcvttss2sisl $dst, $src" %}
14815 ins_encode %{
14816 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14817 %}
14818 ins_pipe(pipe_slow);
14819 %}
14820
14821 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14822 %{
14823 predicate(VM_Version::supports_avx10_2());
14824 match(Set dst (ConvF2I (LoadF src)));
14825 format %{ "evcvttss2sisl $dst, $src" %}
14826 ins_encode %{
14827 __ evcvttss2sisl($dst$$Register, $src$$Address);
14828 %}
14829 ins_pipe(pipe_slow);
14830 %}
14831
14832 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14833 %{
14834 predicate(!VM_Version::supports_avx10_2());
14835 match(Set dst (ConvF2L src));
14836 effect(KILL cr);
14837 format %{ "convert_f2l $dst, $src"%}
14838 ins_encode %{
14839 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14840 %}
14841 ins_pipe(pipe_slow);
14842 %}
14843
14844 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14845 %{
14846 predicate(VM_Version::supports_avx10_2());
14847 match(Set dst (ConvF2L src));
14848 format %{ "evcvttss2sisq $dst, $src" %}
14849 ins_encode %{
14850 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14851 %}
14852 ins_pipe(pipe_slow);
14853 %}
14854
14855 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14856 %{
14857 predicate(VM_Version::supports_avx10_2());
14858 match(Set dst (ConvF2L (LoadF src)));
14859 format %{ "evcvttss2sisq $dst, $src" %}
14860 ins_encode %{
14861 __ evcvttss2sisq($dst$$Register, $src$$Address);
14862 %}
14863 ins_pipe(pipe_slow);
14864 %}
14865
14866 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14867 %{
14868 predicate(!VM_Version::supports_avx10_2());
14869 match(Set dst (ConvD2I src));
14870 effect(KILL cr);
14871 format %{ "convert_d2i $dst, $src"%}
14872 ins_encode %{
14873 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14874 %}
14875 ins_pipe(pipe_slow);
14876 %}
14877
14878 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14879 %{
14880 predicate(VM_Version::supports_avx10_2());
14881 match(Set dst (ConvD2I src));
14882 format %{ "evcvttsd2sisl $dst, $src" %}
14883 ins_encode %{
14884 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14885 %}
14886 ins_pipe(pipe_slow);
14887 %}
14888
14889 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14890 %{
14891 predicate(VM_Version::supports_avx10_2());
14892 match(Set dst (ConvD2I (LoadD src)));
14893 format %{ "evcvttsd2sisl $dst, $src" %}
14894 ins_encode %{
14895 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14896 %}
14897 ins_pipe(pipe_slow);
14898 %}
14899
14900 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14901 %{
14902 predicate(!VM_Version::supports_avx10_2());
14903 match(Set dst (ConvD2L src));
14904 effect(KILL cr);
14905 format %{ "convert_d2l $dst, $src"%}
14906 ins_encode %{
14907 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14908 %}
14909 ins_pipe(pipe_slow);
14910 %}
14911
14912 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14913 %{
14914 predicate(VM_Version::supports_avx10_2());
14915 match(Set dst (ConvD2L src));
14916 format %{ "evcvttsd2sisq $dst, $src" %}
14917 ins_encode %{
14918 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14919 %}
14920 ins_pipe(pipe_slow);
14921 %}
14922
14923 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14924 %{
14925 predicate(VM_Version::supports_avx10_2());
14926 match(Set dst (ConvD2L (LoadD src)));
14927 format %{ "evcvttsd2sisq $dst, $src" %}
14928 ins_encode %{
14929 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14930 %}
14931 ins_pipe(pipe_slow);
14932 %}
14933
14934 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14935 %{
14936 match(Set dst (RoundD src));
14937 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14938 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14939 ins_encode %{
14940 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14941 %}
14942 ins_pipe(pipe_slow);
14943 %}
14944
14945 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14946 %{
14947 match(Set dst (RoundF src));
14948 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14949 format %{ "round_float $dst,$src" %}
14950 ins_encode %{
14951 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14952 %}
14953 ins_pipe(pipe_slow);
14954 %}
14955
14956 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14957 %{
14958 predicate(!UseXmmI2F);
14959 match(Set dst (ConvI2F src));
14960
14961 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14962 ins_encode %{
14963 if (UseAVX > 0) {
14964 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14965 }
14966 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14967 %}
14968 ins_pipe(pipe_slow); // XXX
14969 %}
14970
14971 instruct convI2F_reg_mem(regF dst, memory src)
14972 %{
14973 predicate(UseAVX == 0);
14974 match(Set dst (ConvI2F (LoadI src)));
14975
14976 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14977 ins_encode %{
14978 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14979 %}
14980 ins_pipe(pipe_slow); // XXX
14981 %}
14982
14983 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14984 %{
14985 predicate(!UseXmmI2D);
14986 match(Set dst (ConvI2D src));
14987
14988 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14989 ins_encode %{
14990 if (UseAVX > 0) {
14991 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14992 }
14993 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14994 %}
14995 ins_pipe(pipe_slow); // XXX
14996 %}
14997
14998 instruct convI2D_reg_mem(regD dst, memory src)
14999 %{
15000 predicate(UseAVX == 0);
15001 match(Set dst (ConvI2D (LoadI src)));
15002
15003 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
15004 ins_encode %{
15005 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
15006 %}
15007 ins_pipe(pipe_slow); // XXX
15008 %}
15009
15010 instruct convXI2F_reg(regF dst, rRegI src)
15011 %{
15012 predicate(UseXmmI2F);
15013 match(Set dst (ConvI2F src));
15014
15015 format %{ "movdl $dst, $src\n\t"
15016 "cvtdq2psl $dst, $dst\t# i2f" %}
15017 ins_encode %{
15018 __ movdl($dst$$XMMRegister, $src$$Register);
15019 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
15020 %}
15021 ins_pipe(pipe_slow); // XXX
15022 %}
15023
15024 instruct convXI2D_reg(regD dst, rRegI src)
15025 %{
15026 predicate(UseXmmI2D);
15027 match(Set dst (ConvI2D src));
15028
15029 format %{ "movdl $dst, $src\n\t"
15030 "cvtdq2pdl $dst, $dst\t# i2d" %}
15031 ins_encode %{
15032 __ movdl($dst$$XMMRegister, $src$$Register);
15033 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
15034 %}
15035 ins_pipe(pipe_slow); // XXX
15036 %}
15037
15038 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
15039 %{
15040 match(Set dst (ConvL2F src));
15041
15042 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15043 ins_encode %{
15044 if (UseAVX > 0) {
15045 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15046 }
15047 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
15048 %}
15049 ins_pipe(pipe_slow); // XXX
15050 %}
15051
15052 instruct convL2F_reg_mem(regF dst, memory src)
15053 %{
15054 predicate(UseAVX == 0);
15055 match(Set dst (ConvL2F (LoadL src)));
15056
15057 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15058 ins_encode %{
15059 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15060 %}
15061 ins_pipe(pipe_slow); // XXX
15062 %}
15063
15064 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15065 %{
15066 match(Set dst (ConvL2D src));
15067
15068 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15069 ins_encode %{
15070 if (UseAVX > 0) {
15071 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15072 }
15073 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15074 %}
15075 ins_pipe(pipe_slow); // XXX
15076 %}
15077
15078 instruct convL2D_reg_mem(regD dst, memory src)
15079 %{
15080 predicate(UseAVX == 0);
15081 match(Set dst (ConvL2D (LoadL src)));
15082
15083 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15084 ins_encode %{
15085 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15086 %}
15087 ins_pipe(pipe_slow); // XXX
15088 %}
15089
15090 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15091 %{
15092 match(Set dst (ConvI2L src));
15093
15094 ins_cost(125);
15095 format %{ "movslq $dst, $src\t# i2l" %}
15096 ins_encode %{
15097 __ movslq($dst$$Register, $src$$Register);
15098 %}
15099 ins_pipe(ialu_reg_reg);
15100 %}
15101
15102 // Zero-extend convert int to long
15103 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15104 %{
15105 match(Set dst (AndL (ConvI2L src) mask));
15106
15107 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15108 ins_encode %{
15109 if ($dst$$reg != $src$$reg) {
15110 __ movl($dst$$Register, $src$$Register);
15111 }
15112 %}
15113 ins_pipe(ialu_reg_reg);
15114 %}
15115
15116 // Zero-extend convert int to long
15117 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15118 %{
15119 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15120
15121 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15122 ins_encode %{
15123 __ movl($dst$$Register, $src$$Address);
15124 %}
15125 ins_pipe(ialu_reg_mem);
15126 %}
15127
15128 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15129 %{
15130 match(Set dst (AndL src mask));
15131
15132 format %{ "movl $dst, $src\t# zero-extend long" %}
15133 ins_encode %{
15134 __ movl($dst$$Register, $src$$Register);
15135 %}
15136 ins_pipe(ialu_reg_reg);
15137 %}
15138
15139 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15140 %{
15141 match(Set dst (ConvL2I src));
15142
15143 format %{ "movl $dst, $src\t# l2i" %}
15144 ins_encode %{
15145 __ movl($dst$$Register, $src$$Register);
15146 %}
15147 ins_pipe(ialu_reg_reg);
15148 %}
15149
15150
15151 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15152 match(Set dst (MoveF2I src));
15153 effect(DEF dst, USE src);
15154
15155 ins_cost(125);
15156 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15157 ins_encode %{
15158 __ movl($dst$$Register, Address(rsp, $src$$disp));
15159 %}
15160 ins_pipe(ialu_reg_mem);
15161 %}
15162
15163 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15164 match(Set dst (MoveI2F src));
15165 effect(DEF dst, USE src);
15166
15167 ins_cost(125);
15168 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15169 ins_encode %{
15170 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15171 %}
15172 ins_pipe(pipe_slow);
15173 %}
15174
15175 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15176 match(Set dst (MoveD2L src));
15177 effect(DEF dst, USE src);
15178
15179 ins_cost(125);
15180 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15181 ins_encode %{
15182 __ movq($dst$$Register, Address(rsp, $src$$disp));
15183 %}
15184 ins_pipe(ialu_reg_mem);
15185 %}
15186
15187 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15188 predicate(!UseXmmLoadAndClearUpper);
15189 match(Set dst (MoveL2D src));
15190 effect(DEF dst, USE src);
15191
15192 ins_cost(125);
15193 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15194 ins_encode %{
15195 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15196 %}
15197 ins_pipe(pipe_slow);
15198 %}
15199
15200 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15201 predicate(UseXmmLoadAndClearUpper);
15202 match(Set dst (MoveL2D src));
15203 effect(DEF dst, USE src);
15204
15205 ins_cost(125);
15206 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15207 ins_encode %{
15208 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15209 %}
15210 ins_pipe(pipe_slow);
15211 %}
15212
15213
15214 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15215 match(Set dst (MoveF2I src));
15216 effect(DEF dst, USE src);
15217
15218 ins_cost(95); // XXX
15219 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15220 ins_encode %{
15221 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15222 %}
15223 ins_pipe(pipe_slow);
15224 %}
15225
15226 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15227 match(Set dst (MoveI2F src));
15228 effect(DEF dst, USE src);
15229
15230 ins_cost(100);
15231 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15232 ins_encode %{
15233 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15234 %}
15235 ins_pipe( ialu_mem_reg );
15236 %}
15237
15238 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15239 match(Set dst (MoveD2L src));
15240 effect(DEF dst, USE src);
15241
15242 ins_cost(95); // XXX
15243 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15244 ins_encode %{
15245 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15246 %}
15247 ins_pipe(pipe_slow);
15248 %}
15249
15250 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15251 match(Set dst (MoveL2D src));
15252 effect(DEF dst, USE src);
15253
15254 ins_cost(100);
15255 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15256 ins_encode %{
15257 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15258 %}
15259 ins_pipe(ialu_mem_reg);
15260 %}
15261
15262 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15263 match(Set dst (MoveF2I src));
15264 effect(DEF dst, USE src);
15265 ins_cost(85);
15266 format %{ "movd $dst,$src\t# MoveF2I" %}
15267 ins_encode %{
15268 __ movdl($dst$$Register, $src$$XMMRegister);
15269 %}
15270 ins_pipe( pipe_slow );
15271 %}
15272
15273 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15274 match(Set dst (MoveD2L src));
15275 effect(DEF dst, USE src);
15276 ins_cost(85);
15277 format %{ "movd $dst,$src\t# MoveD2L" %}
15278 ins_encode %{
15279 __ movdq($dst$$Register, $src$$XMMRegister);
15280 %}
15281 ins_pipe( pipe_slow );
15282 %}
15283
15284 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15285 match(Set dst (MoveI2F src));
15286 effect(DEF dst, USE src);
15287 ins_cost(100);
15288 format %{ "movd $dst,$src\t# MoveI2F" %}
15289 ins_encode %{
15290 __ movdl($dst$$XMMRegister, $src$$Register);
15291 %}
15292 ins_pipe( pipe_slow );
15293 %}
15294
15295 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15296 match(Set dst (MoveL2D src));
15297 effect(DEF dst, USE src);
15298 ins_cost(100);
15299 format %{ "movd $dst,$src\t# MoveL2D" %}
15300 ins_encode %{
15301 __ movdq($dst$$XMMRegister, $src$$Register);
15302 %}
15303 ins_pipe( pipe_slow );
15304 %}
15305
15306
15307 // Fast clearing of an array
15308 // Small non-constant lenght ClearArray for non-AVX512 targets.
15309 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15310 Universe dummy, rFlagsReg cr)
15311 %{
15312 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15313 match(Set dummy (ClearArray (Binary cnt base) val));
15314 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15315
15316 format %{ $$template
15317 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15318 $$emit$$"jg LARGE\n\t"
15319 $$emit$$"dec rcx\n\t"
15320 $$emit$$"js DONE\t# Zero length\n\t"
15321 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15322 $$emit$$"dec rcx\n\t"
15323 $$emit$$"jge LOOP\n\t"
15324 $$emit$$"jmp DONE\n\t"
15325 $$emit$$"# LARGE:\n\t"
15326 if (UseFastStosb) {
15327 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15328 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15329 } else if (UseXMMForObjInit) {
15330 $$emit$$"movdq $tmp, $val\n\t"
15331 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15332 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15333 $$emit$$"jmpq L_zero_64_bytes\n\t"
15334 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15336 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15337 $$emit$$"add 0x40,rax\n\t"
15338 $$emit$$"# L_zero_64_bytes:\n\t"
15339 $$emit$$"sub 0x8,rcx\n\t"
15340 $$emit$$"jge L_loop\n\t"
15341 $$emit$$"add 0x4,rcx\n\t"
15342 $$emit$$"jl L_tail\n\t"
15343 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15344 $$emit$$"add 0x20,rax\n\t"
15345 $$emit$$"sub 0x4,rcx\n\t"
15346 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jle L_end\n\t"
15349 $$emit$$"dec rcx\n\t"
15350 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351 $$emit$$"vmovq xmm0,(rax)\n\t"
15352 $$emit$$"add 0x8,rax\n\t"
15353 $$emit$$"dec rcx\n\t"
15354 $$emit$$"jge L_sloop\n\t"
15355 $$emit$$"# L_end:\n\t"
15356 } else {
15357 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15358 }
15359 $$emit$$"# DONE"
15360 %}
15361 ins_encode %{
15362 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15363 $tmp$$XMMRegister, false, false);
15364 %}
15365 ins_pipe(pipe_slow);
15366 %}
15367
15368 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15369 Universe dummy, rFlagsReg cr)
15370 %{
15371 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15372 match(Set dummy (ClearArray (Binary cnt base) val));
15373 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15374
15375 format %{ $$template
15376 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15377 $$emit$$"jg LARGE\n\t"
15378 $$emit$$"dec rcx\n\t"
15379 $$emit$$"js DONE\t# Zero length\n\t"
15380 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15381 $$emit$$"dec rcx\n\t"
15382 $$emit$$"jge LOOP\n\t"
15383 $$emit$$"jmp DONE\n\t"
15384 $$emit$$"# LARGE:\n\t"
15385 if (UseXMMForObjInit) {
15386 $$emit$$"movdq $tmp, $val\n\t"
15387 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15388 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15389 $$emit$$"jmpq L_zero_64_bytes\n\t"
15390 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15391 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15392 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15393 $$emit$$"add 0x40,rax\n\t"
15394 $$emit$$"# L_zero_64_bytes:\n\t"
15395 $$emit$$"sub 0x8,rcx\n\t"
15396 $$emit$$"jge L_loop\n\t"
15397 $$emit$$"add 0x4,rcx\n\t"
15398 $$emit$$"jl L_tail\n\t"
15399 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15400 $$emit$$"add 0x20,rax\n\t"
15401 $$emit$$"sub 0x4,rcx\n\t"
15402 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15403 $$emit$$"add 0x4,rcx\n\t"
15404 $$emit$$"jle L_end\n\t"
15405 $$emit$$"dec rcx\n\t"
15406 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15407 $$emit$$"vmovq xmm0,(rax)\n\t"
15408 $$emit$$"add 0x8,rax\n\t"
15409 $$emit$$"dec rcx\n\t"
15410 $$emit$$"jge L_sloop\n\t"
15411 $$emit$$"# L_end:\n\t"
15412 } else {
15413 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15414 }
15415 $$emit$$"# DONE"
15416 %}
15417 ins_encode %{
15418 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15419 $tmp$$XMMRegister, false, true);
15420 %}
15421 ins_pipe(pipe_slow);
15422 %}
15423
15424 // Small non-constant length ClearArray for AVX512 targets.
15425 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15426 Universe dummy, rFlagsReg cr)
15427 %{
15428 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15429 match(Set dummy (ClearArray (Binary cnt base) val));
15430 ins_cost(125);
15431 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15432
15433 format %{ $$template
15434 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15435 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15436 $$emit$$"jg LARGE\n\t"
15437 $$emit$$"dec rcx\n\t"
15438 $$emit$$"js DONE\t# Zero length\n\t"
15439 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15440 $$emit$$"dec rcx\n\t"
15441 $$emit$$"jge LOOP\n\t"
15442 $$emit$$"jmp DONE\n\t"
15443 $$emit$$"# LARGE:\n\t"
15444 if (UseFastStosb) {
15445 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15446 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15447 } else if (UseXMMForObjInit) {
15448 $$emit$$"mov rdi,rax\n\t"
15449 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15450 $$emit$$"jmpq L_zero_64_bytes\n\t"
15451 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15452 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15453 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15454 $$emit$$"add 0x40,rax\n\t"
15455 $$emit$$"# L_zero_64_bytes:\n\t"
15456 $$emit$$"sub 0x8,rcx\n\t"
15457 $$emit$$"jge L_loop\n\t"
15458 $$emit$$"add 0x4,rcx\n\t"
15459 $$emit$$"jl L_tail\n\t"
15460 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15461 $$emit$$"add 0x20,rax\n\t"
15462 $$emit$$"sub 0x4,rcx\n\t"
15463 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15464 $$emit$$"add 0x4,rcx\n\t"
15465 $$emit$$"jle L_end\n\t"
15466 $$emit$$"dec rcx\n\t"
15467 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15468 $$emit$$"vmovq xmm0,(rax)\n\t"
15469 $$emit$$"add 0x8,rax\n\t"
15470 $$emit$$"dec rcx\n\t"
15471 $$emit$$"jge L_sloop\n\t"
15472 $$emit$$"# L_end:\n\t"
15473 } else {
15474 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15475 }
15476 $$emit$$"# DONE"
15477 %}
15478 ins_encode %{
15479 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15480 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15481 %}
15482 ins_pipe(pipe_slow);
15483 %}
15484
15485 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15486 Universe dummy, rFlagsReg cr)
15487 %{
15488 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15489 match(Set dummy (ClearArray (Binary cnt base) val));
15490 ins_cost(125);
15491 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15492
15493 format %{ $$template
15494 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15495 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15496 $$emit$$"jg LARGE\n\t"
15497 $$emit$$"dec rcx\n\t"
15498 $$emit$$"js DONE\t# Zero length\n\t"
15499 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15500 $$emit$$"dec rcx\n\t"
15501 $$emit$$"jge LOOP\n\t"
15502 $$emit$$"jmp DONE\n\t"
15503 $$emit$$"# LARGE:\n\t"
15504 if (UseFastStosb) {
15505 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15506 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15507 } else if (UseXMMForObjInit) {
15508 $$emit$$"mov rdi,rax\n\t"
15509 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15510 $$emit$$"jmpq L_zero_64_bytes\n\t"
15511 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15512 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15513 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15514 $$emit$$"add 0x40,rax\n\t"
15515 $$emit$$"# L_zero_64_bytes:\n\t"
15516 $$emit$$"sub 0x8,rcx\n\t"
15517 $$emit$$"jge L_loop\n\t"
15518 $$emit$$"add 0x4,rcx\n\t"
15519 $$emit$$"jl L_tail\n\t"
15520 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15521 $$emit$$"add 0x20,rax\n\t"
15522 $$emit$$"sub 0x4,rcx\n\t"
15523 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15524 $$emit$$"add 0x4,rcx\n\t"
15525 $$emit$$"jle L_end\n\t"
15526 $$emit$$"dec rcx\n\t"
15527 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15528 $$emit$$"vmovq xmm0,(rax)\n\t"
15529 $$emit$$"add 0x8,rax\n\t"
15530 $$emit$$"dec rcx\n\t"
15531 $$emit$$"jge L_sloop\n\t"
15532 $$emit$$"# L_end:\n\t"
15533 } else {
15534 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15535 }
15536 $$emit$$"# DONE"
15537 %}
15538 ins_encode %{
15539 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15541 %}
15542 ins_pipe(pipe_slow);
15543 %}
15544
15545 // Large non-constant length ClearArray for non-AVX512 targets.
15546 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15547 Universe dummy, rFlagsReg cr)
15548 %{
15549 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15550 match(Set dummy (ClearArray (Binary cnt base) val));
15551 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15552
15553 format %{ $$template
15554 if (UseFastStosb) {
15555 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15556 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15557 } else if (UseXMMForObjInit) {
15558 $$emit$$"movdq $tmp, $val\n\t"
15559 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15560 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15561 $$emit$$"jmpq L_zero_64_bytes\n\t"
15562 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15563 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15564 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15565 $$emit$$"add 0x40,rax\n\t"
15566 $$emit$$"# L_zero_64_bytes:\n\t"
15567 $$emit$$"sub 0x8,rcx\n\t"
15568 $$emit$$"jge L_loop\n\t"
15569 $$emit$$"add 0x4,rcx\n\t"
15570 $$emit$$"jl L_tail\n\t"
15571 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15572 $$emit$$"add 0x20,rax\n\t"
15573 $$emit$$"sub 0x4,rcx\n\t"
15574 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15575 $$emit$$"add 0x4,rcx\n\t"
15576 $$emit$$"jle L_end\n\t"
15577 $$emit$$"dec rcx\n\t"
15578 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15579 $$emit$$"vmovq xmm0,(rax)\n\t"
15580 $$emit$$"add 0x8,rax\n\t"
15581 $$emit$$"dec rcx\n\t"
15582 $$emit$$"jge L_sloop\n\t"
15583 $$emit$$"# L_end:\n\t"
15584 } else {
15585 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15586 }
15587 %}
15588 ins_encode %{
15589 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15590 $tmp$$XMMRegister, true, false);
15591 %}
15592 ins_pipe(pipe_slow);
15593 %}
15594
15595 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15596 Universe dummy, rFlagsReg cr)
15597 %{
15598 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15599 match(Set dummy (ClearArray (Binary cnt base) val));
15600 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15601
15602 format %{ $$template
15603 if (UseXMMForObjInit) {
15604 $$emit$$"movdq $tmp, $val\n\t"
15605 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15606 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15607 $$emit$$"jmpq L_zero_64_bytes\n\t"
15608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15610 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15611 $$emit$$"add 0x40,rax\n\t"
15612 $$emit$$"# L_zero_64_bytes:\n\t"
15613 $$emit$$"sub 0x8,rcx\n\t"
15614 $$emit$$"jge L_loop\n\t"
15615 $$emit$$"add 0x4,rcx\n\t"
15616 $$emit$$"jl L_tail\n\t"
15617 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15618 $$emit$$"add 0x20,rax\n\t"
15619 $$emit$$"sub 0x4,rcx\n\t"
15620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621 $$emit$$"add 0x4,rcx\n\t"
15622 $$emit$$"jle L_end\n\t"
15623 $$emit$$"dec rcx\n\t"
15624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625 $$emit$$"vmovq xmm0,(rax)\n\t"
15626 $$emit$$"add 0x8,rax\n\t"
15627 $$emit$$"dec rcx\n\t"
15628 $$emit$$"jge L_sloop\n\t"
15629 $$emit$$"# L_end:\n\t"
15630 } else {
15631 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15632 }
15633 %}
15634 ins_encode %{
15635 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15636 $tmp$$XMMRegister, true, true);
15637 %}
15638 ins_pipe(pipe_slow);
15639 %}
15640
15641 // Large non-constant length ClearArray for AVX512 targets.
15642 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643 Universe dummy, rFlagsReg cr)
15644 %{
15645 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646 match(Set dummy (ClearArray (Binary cnt base) val));
15647 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648
15649 format %{ $$template
15650 if (UseFastStosb) {
15651 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15652 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15653 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15654 } else if (UseXMMForObjInit) {
15655 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15656 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15657 $$emit$$"jmpq L_zero_64_bytes\n\t"
15658 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661 $$emit$$"add 0x40,rax\n\t"
15662 $$emit$$"# L_zero_64_bytes:\n\t"
15663 $$emit$$"sub 0x8,rcx\n\t"
15664 $$emit$$"jge L_loop\n\t"
15665 $$emit$$"add 0x4,rcx\n\t"
15666 $$emit$$"jl L_tail\n\t"
15667 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668 $$emit$$"add 0x20,rax\n\t"
15669 $$emit$$"sub 0x4,rcx\n\t"
15670 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671 $$emit$$"add 0x4,rcx\n\t"
15672 $$emit$$"jle L_end\n\t"
15673 $$emit$$"dec rcx\n\t"
15674 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675 $$emit$$"vmovq xmm0,(rax)\n\t"
15676 $$emit$$"add 0x8,rax\n\t"
15677 $$emit$$"dec rcx\n\t"
15678 $$emit$$"jge L_sloop\n\t"
15679 $$emit$$"# L_end:\n\t"
15680 } else {
15681 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15682 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15683 }
15684 %}
15685 ins_encode %{
15686 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15688 %}
15689 ins_pipe(pipe_slow);
15690 %}
15691
15692 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15693 Universe dummy, rFlagsReg cr)
15694 %{
15695 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15696 match(Set dummy (ClearArray (Binary cnt base) val));
15697 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15698
15699 format %{ $$template
15700 if (UseFastStosb) {
15701 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15702 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15703 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15704 } else if (UseXMMForObjInit) {
15705 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15706 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15707 $$emit$$"jmpq L_zero_64_bytes\n\t"
15708 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15709 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15710 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15711 $$emit$$"add 0x40,rax\n\t"
15712 $$emit$$"# L_zero_64_bytes:\n\t"
15713 $$emit$$"sub 0x8,rcx\n\t"
15714 $$emit$$"jge L_loop\n\t"
15715 $$emit$$"add 0x4,rcx\n\t"
15716 $$emit$$"jl L_tail\n\t"
15717 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15718 $$emit$$"add 0x20,rax\n\t"
15719 $$emit$$"sub 0x4,rcx\n\t"
15720 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15721 $$emit$$"add 0x4,rcx\n\t"
15722 $$emit$$"jle L_end\n\t"
15723 $$emit$$"dec rcx\n\t"
15724 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15725 $$emit$$"vmovq xmm0,(rax)\n\t"
15726 $$emit$$"add 0x8,rax\n\t"
15727 $$emit$$"dec rcx\n\t"
15728 $$emit$$"jge L_sloop\n\t"
15729 $$emit$$"# L_end:\n\t"
15730 } else {
15731 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15732 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15733 }
15734 %}
15735 ins_encode %{
15736 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15737 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15738 %}
15739 ins_pipe(pipe_slow);
15740 %}
15741
15742 // Small constant length ClearArray for AVX512 targets.
15743 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15744 %{
15745 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15746 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15747 match(Set dummy (ClearArray (Binary cnt base) val));
15748 ins_cost(100);
15749 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15750 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15751 ins_encode %{
15752 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15753 %}
15754 ins_pipe(pipe_slow);
15755 %}
15756
15757 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15758 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15759 %{
15760 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15761 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15762 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15763
15764 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15765 ins_encode %{
15766 __ string_compare($str1$$Register, $str2$$Register,
15767 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15768 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15769 %}
15770 ins_pipe( pipe_slow );
15771 %}
15772
15773 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15774 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15775 %{
15776 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15777 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15778 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15779
15780 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15781 ins_encode %{
15782 __ string_compare($str1$$Register, $str2$$Register,
15783 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15784 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15785 %}
15786 ins_pipe( pipe_slow );
15787 %}
15788
15789 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15790 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15791 %{
15792 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15793 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15794 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15795
15796 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15797 ins_encode %{
15798 __ string_compare($str1$$Register, $str2$$Register,
15799 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15800 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15801 %}
15802 ins_pipe( pipe_slow );
15803 %}
15804
15805 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15806 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15807 %{
15808 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15809 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15810 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15811
15812 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15813 ins_encode %{
15814 __ string_compare($str1$$Register, $str2$$Register,
15815 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15816 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15817 %}
15818 ins_pipe( pipe_slow );
15819 %}
15820
15821 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15822 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15823 %{
15824 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15825 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15826 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15827
15828 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15829 ins_encode %{
15830 __ string_compare($str1$$Register, $str2$$Register,
15831 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15832 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15833 %}
15834 ins_pipe( pipe_slow );
15835 %}
15836
15837 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15838 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15839 %{
15840 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15841 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15842 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15843
15844 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15845 ins_encode %{
15846 __ string_compare($str1$$Register, $str2$$Register,
15847 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15848 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15849 %}
15850 ins_pipe( pipe_slow );
15851 %}
15852
15853 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15854 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15855 %{
15856 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15857 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15858 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15859
15860 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15861 ins_encode %{
15862 __ string_compare($str2$$Register, $str1$$Register,
15863 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15864 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15865 %}
15866 ins_pipe( pipe_slow );
15867 %}
15868
15869 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15870 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15871 %{
15872 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15873 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15874 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15875
15876 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15877 ins_encode %{
15878 __ string_compare($str2$$Register, $str1$$Register,
15879 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15880 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15881 %}
15882 ins_pipe( pipe_slow );
15883 %}
15884
15885 // fast search of substring with known size.
15886 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15887 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15888 %{
15889 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15890 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15891 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15892
15893 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15894 ins_encode %{
15895 int icnt2 = (int)$int_cnt2$$constant;
15896 if (icnt2 >= 16) {
15897 // IndexOf for constant substrings with size >= 16 elements
15898 // which don't need to be loaded through stack.
15899 __ string_indexofC8($str1$$Register, $str2$$Register,
15900 $cnt1$$Register, $cnt2$$Register,
15901 icnt2, $result$$Register,
15902 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15903 } else {
15904 // Small strings are loaded through stack if they cross page boundary.
15905 __ string_indexof($str1$$Register, $str2$$Register,
15906 $cnt1$$Register, $cnt2$$Register,
15907 icnt2, $result$$Register,
15908 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15909 }
15910 %}
15911 ins_pipe( pipe_slow );
15912 %}
15913
15914 // fast search of substring with known size.
15915 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15916 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15917 %{
15918 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15919 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15920 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15921
15922 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15923 ins_encode %{
15924 int icnt2 = (int)$int_cnt2$$constant;
15925 if (icnt2 >= 8) {
15926 // IndexOf for constant substrings with size >= 8 elements
15927 // which don't need to be loaded through stack.
15928 __ string_indexofC8($str1$$Register, $str2$$Register,
15929 $cnt1$$Register, $cnt2$$Register,
15930 icnt2, $result$$Register,
15931 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15932 } else {
15933 // Small strings are loaded through stack if they cross page boundary.
15934 __ string_indexof($str1$$Register, $str2$$Register,
15935 $cnt1$$Register, $cnt2$$Register,
15936 icnt2, $result$$Register,
15937 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15938 }
15939 %}
15940 ins_pipe( pipe_slow );
15941 %}
15942
15943 // fast search of substring with known size.
15944 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15945 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15946 %{
15947 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15948 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15949 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15950
15951 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15952 ins_encode %{
15953 int icnt2 = (int)$int_cnt2$$constant;
15954 if (icnt2 >= 8) {
15955 // IndexOf for constant substrings with size >= 8 elements
15956 // which don't need to be loaded through stack.
15957 __ string_indexofC8($str1$$Register, $str2$$Register,
15958 $cnt1$$Register, $cnt2$$Register,
15959 icnt2, $result$$Register,
15960 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15961 } else {
15962 // Small strings are loaded through stack if they cross page boundary.
15963 __ string_indexof($str1$$Register, $str2$$Register,
15964 $cnt1$$Register, $cnt2$$Register,
15965 icnt2, $result$$Register,
15966 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15967 }
15968 %}
15969 ins_pipe( pipe_slow );
15970 %}
15971
15972 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15973 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15974 %{
15975 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15976 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15977 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15978
15979 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15980 ins_encode %{
15981 __ string_indexof($str1$$Register, $str2$$Register,
15982 $cnt1$$Register, $cnt2$$Register,
15983 (-1), $result$$Register,
15984 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15985 %}
15986 ins_pipe( pipe_slow );
15987 %}
15988
15989 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15990 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15991 %{
15992 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15993 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15994 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15995
15996 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15997 ins_encode %{
15998 __ string_indexof($str1$$Register, $str2$$Register,
15999 $cnt1$$Register, $cnt2$$Register,
16000 (-1), $result$$Register,
16001 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
16002 %}
16003 ins_pipe( pipe_slow );
16004 %}
16005
16006 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
16007 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
16008 %{
16009 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
16010 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16011 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
16012
16013 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
16014 ins_encode %{
16015 __ string_indexof($str1$$Register, $str2$$Register,
16016 $cnt1$$Register, $cnt2$$Register,
16017 (-1), $result$$Register,
16018 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
16019 %}
16020 ins_pipe( pipe_slow );
16021 %}
16022
16023 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16024 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16025 %{
16026 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
16027 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16028 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16029 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16030 ins_encode %{
16031 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16032 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16033 %}
16034 ins_pipe( pipe_slow );
16035 %}
16036
16037 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
16038 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
16039 %{
16040 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
16041 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16042 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
16043 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
16044 ins_encode %{
16045 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
16046 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
16047 %}
16048 ins_pipe( pipe_slow );
16049 %}
16050
16051 // fast string equals
16052 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16053 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16054 %{
16055 predicate(!VM_Version::supports_avx512vlbw());
16056 match(Set result (StrEquals (Binary str1 str2) cnt));
16057 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16058
16059 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16060 ins_encode %{
16061 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16062 $cnt$$Register, $result$$Register, $tmp3$$Register,
16063 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16064 %}
16065 ins_pipe( pipe_slow );
16066 %}
16067
16068 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16069 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16070 %{
16071 predicate(VM_Version::supports_avx512vlbw());
16072 match(Set result (StrEquals (Binary str1 str2) cnt));
16073 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16074
16075 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16076 ins_encode %{
16077 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16078 $cnt$$Register, $result$$Register, $tmp3$$Register,
16079 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16080 %}
16081 ins_pipe( pipe_slow );
16082 %}
16083
16084 // fast array equals
16085 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16086 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16087 %{
16088 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16089 match(Set result (AryEq ary1 ary2));
16090 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16091
16092 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16093 ins_encode %{
16094 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16095 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16096 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16097 %}
16098 ins_pipe( pipe_slow );
16099 %}
16100
16101 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16102 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16103 %{
16104 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16105 match(Set result (AryEq ary1 ary2));
16106 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16107
16108 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16109 ins_encode %{
16110 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16111 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16112 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16113 %}
16114 ins_pipe( pipe_slow );
16115 %}
16116
16117 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16118 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16119 %{
16120 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16121 match(Set result (AryEq ary1 ary2));
16122 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16123
16124 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16125 ins_encode %{
16126 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16127 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16128 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16129 %}
16130 ins_pipe( pipe_slow );
16131 %}
16132
16133 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16134 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16135 %{
16136 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16137 match(Set result (AryEq ary1 ary2));
16138 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16139
16140 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16141 ins_encode %{
16142 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16143 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16144 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16145 %}
16146 ins_pipe( pipe_slow );
16147 %}
16148
16149 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16150 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16151 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16152 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16153 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16154 %{
16155 predicate(UseAVX >= 2);
16156 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16157 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16158 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16159 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16160 USE basic_type, KILL cr);
16161
16162 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
16163 ins_encode %{
16164 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16165 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16166 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16167 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16168 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16169 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16170 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16171 %}
16172 ins_pipe( pipe_slow );
16173 %}
16174
16175 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16176 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16177 %{
16178 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16179 match(Set result (CountPositives ary1 len));
16180 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16181
16182 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16183 ins_encode %{
16184 __ count_positives($ary1$$Register, $len$$Register,
16185 $result$$Register, $tmp3$$Register,
16186 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16187 %}
16188 ins_pipe( pipe_slow );
16189 %}
16190
16191 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16192 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16193 %{
16194 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16195 match(Set result (CountPositives ary1 len));
16196 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16197
16198 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16199 ins_encode %{
16200 __ count_positives($ary1$$Register, $len$$Register,
16201 $result$$Register, $tmp3$$Register,
16202 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16203 %}
16204 ins_pipe( pipe_slow );
16205 %}
16206
16207 // fast char[] to byte[] compression
16208 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16209 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16210 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16211 match(Set result (StrCompressedCopy src (Binary dst len)));
16212 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16213 USE_KILL len, KILL tmp5, KILL cr);
16214
16215 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16216 ins_encode %{
16217 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16218 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16219 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16220 knoreg, knoreg);
16221 %}
16222 ins_pipe( pipe_slow );
16223 %}
16224
16225 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16226 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16227 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16228 match(Set result (StrCompressedCopy src (Binary dst len)));
16229 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16230 USE_KILL len, KILL tmp5, KILL cr);
16231
16232 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16233 ins_encode %{
16234 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16235 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16236 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16237 $ktmp1$$KRegister, $ktmp2$$KRegister);
16238 %}
16239 ins_pipe( pipe_slow );
16240 %}
16241 // fast byte[] to char[] inflation
16242 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16243 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16244 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16245 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16246 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16247
16248 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16249 ins_encode %{
16250 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16251 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16252 %}
16253 ins_pipe( pipe_slow );
16254 %}
16255
16256 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16257 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16258 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16259 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16260 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16261
16262 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16263 ins_encode %{
16264 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16265 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16266 %}
16267 ins_pipe( pipe_slow );
16268 %}
16269
16270 // encode char[] to byte[] in ISO_8859_1
16271 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16272 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16273 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16274 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16275 match(Set result (EncodeISOArray src (Binary dst len)));
16276 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16277
16278 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16279 ins_encode %{
16280 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16281 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16282 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16283 %}
16284 ins_pipe( pipe_slow );
16285 %}
16286
16287 // encode char[] to byte[] in ASCII
16288 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16289 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16290 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16291 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16292 match(Set result (EncodeISOArray src (Binary dst len)));
16293 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16294
16295 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16296 ins_encode %{
16297 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16298 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16299 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16300 %}
16301 ins_pipe( pipe_slow );
16302 %}
16303
16304 //----------Overflow Math Instructions-----------------------------------------
16305
16306 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16307 %{
16308 match(Set cr (OverflowAddI op1 op2));
16309 effect(DEF cr, USE_KILL op1, USE op2);
16310
16311 format %{ "addl $op1, $op2\t# overflow check int" %}
16312
16313 ins_encode %{
16314 __ addl($op1$$Register, $op2$$Register);
16315 %}
16316 ins_pipe(ialu_reg_reg);
16317 %}
16318
16319 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16320 %{
16321 match(Set cr (OverflowAddI op1 op2));
16322 effect(DEF cr, USE_KILL op1, USE op2);
16323
16324 format %{ "addl $op1, $op2\t# overflow check int" %}
16325
16326 ins_encode %{
16327 __ addl($op1$$Register, $op2$$constant);
16328 %}
16329 ins_pipe(ialu_reg_reg);
16330 %}
16331
16332 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16333 %{
16334 match(Set cr (OverflowAddL op1 op2));
16335 effect(DEF cr, USE_KILL op1, USE op2);
16336
16337 format %{ "addq $op1, $op2\t# overflow check long" %}
16338 ins_encode %{
16339 __ addq($op1$$Register, $op2$$Register);
16340 %}
16341 ins_pipe(ialu_reg_reg);
16342 %}
16343
16344 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16345 %{
16346 match(Set cr (OverflowAddL op1 op2));
16347 effect(DEF cr, USE_KILL op1, USE op2);
16348
16349 format %{ "addq $op1, $op2\t# overflow check long" %}
16350 ins_encode %{
16351 __ addq($op1$$Register, $op2$$constant);
16352 %}
16353 ins_pipe(ialu_reg_reg);
16354 %}
16355
16356 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16357 %{
16358 match(Set cr (OverflowSubI op1 op2));
16359
16360 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16361 ins_encode %{
16362 __ cmpl($op1$$Register, $op2$$Register);
16363 %}
16364 ins_pipe(ialu_reg_reg);
16365 %}
16366
16367 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16368 %{
16369 match(Set cr (OverflowSubI op1 op2));
16370
16371 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16372 ins_encode %{
16373 __ cmpl($op1$$Register, $op2$$constant);
16374 %}
16375 ins_pipe(ialu_reg_reg);
16376 %}
16377
16378 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16379 %{
16380 match(Set cr (OverflowSubL op1 op2));
16381
16382 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16383 ins_encode %{
16384 __ cmpq($op1$$Register, $op2$$Register);
16385 %}
16386 ins_pipe(ialu_reg_reg);
16387 %}
16388
16389 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16390 %{
16391 match(Set cr (OverflowSubL op1 op2));
16392
16393 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16394 ins_encode %{
16395 __ cmpq($op1$$Register, $op2$$constant);
16396 %}
16397 ins_pipe(ialu_reg_reg);
16398 %}
16399
16400 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16401 %{
16402 match(Set cr (OverflowSubI zero op2));
16403 effect(DEF cr, USE_KILL op2);
16404
16405 format %{ "negl $op2\t# overflow check int" %}
16406 ins_encode %{
16407 __ negl($op2$$Register);
16408 %}
16409 ins_pipe(ialu_reg_reg);
16410 %}
16411
16412 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16413 %{
16414 match(Set cr (OverflowSubL zero op2));
16415 effect(DEF cr, USE_KILL op2);
16416
16417 format %{ "negq $op2\t# overflow check long" %}
16418 ins_encode %{
16419 __ negq($op2$$Register);
16420 %}
16421 ins_pipe(ialu_reg_reg);
16422 %}
16423
16424 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16425 %{
16426 match(Set cr (OverflowMulI op1 op2));
16427 effect(DEF cr, USE_KILL op1, USE op2);
16428
16429 format %{ "imull $op1, $op2\t# overflow check int" %}
16430 ins_encode %{
16431 __ imull($op1$$Register, $op2$$Register);
16432 %}
16433 ins_pipe(ialu_reg_reg_alu0);
16434 %}
16435
16436 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16437 %{
16438 match(Set cr (OverflowMulI op1 op2));
16439 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16440
16441 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16442 ins_encode %{
16443 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16444 %}
16445 ins_pipe(ialu_reg_reg_alu0);
16446 %}
16447
16448 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16449 %{
16450 match(Set cr (OverflowMulL op1 op2));
16451 effect(DEF cr, USE_KILL op1, USE op2);
16452
16453 format %{ "imulq $op1, $op2\t# overflow check long" %}
16454 ins_encode %{
16455 __ imulq($op1$$Register, $op2$$Register);
16456 %}
16457 ins_pipe(ialu_reg_reg_alu0);
16458 %}
16459
16460 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16461 %{
16462 match(Set cr (OverflowMulL op1 op2));
16463 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16464
16465 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16466 ins_encode %{
16467 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16468 %}
16469 ins_pipe(ialu_reg_reg_alu0);
16470 %}
16471
16472
16473 //----------Control Flow Instructions------------------------------------------
16474 // Signed compare Instructions
16475
16476 // XXX more variants!!
16477 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16478 %{
16479 match(Set cr (CmpI op1 op2));
16480 effect(DEF cr, USE op1, USE op2);
16481
16482 format %{ "cmpl $op1, $op2" %}
16483 ins_encode %{
16484 __ cmpl($op1$$Register, $op2$$Register);
16485 %}
16486 ins_pipe(ialu_cr_reg_reg);
16487 %}
16488
16489 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16490 %{
16491 match(Set cr (CmpI op1 op2));
16492
16493 format %{ "cmpl $op1, $op2" %}
16494 ins_encode %{
16495 __ cmpl($op1$$Register, $op2$$constant);
16496 %}
16497 ins_pipe(ialu_cr_reg_imm);
16498 %}
16499
16500 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16501 %{
16502 match(Set cr (CmpI op1 (LoadI op2)));
16503
16504 ins_cost(500); // XXX
16505 format %{ "cmpl $op1, $op2" %}
16506 ins_encode %{
16507 __ cmpl($op1$$Register, $op2$$Address);
16508 %}
16509 ins_pipe(ialu_cr_reg_mem);
16510 %}
16511
16512 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16513 %{
16514 match(Set cr (CmpI src zero));
16515
16516 format %{ "testl $src, $src" %}
16517 ins_encode %{
16518 __ testl($src$$Register, $src$$Register);
16519 %}
16520 ins_pipe(ialu_cr_reg_imm);
16521 %}
16522
16523 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16524 %{
16525 match(Set cr (CmpI (AndI src con) zero));
16526
16527 format %{ "testl $src, $con" %}
16528 ins_encode %{
16529 __ testl($src$$Register, $con$$constant);
16530 %}
16531 ins_pipe(ialu_cr_reg_imm);
16532 %}
16533
16534 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16535 %{
16536 match(Set cr (CmpI (AndI src1 src2) zero));
16537
16538 format %{ "testl $src1, $src2" %}
16539 ins_encode %{
16540 __ testl($src1$$Register, $src2$$Register);
16541 %}
16542 ins_pipe(ialu_cr_reg_imm);
16543 %}
16544
16545 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16546 %{
16547 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16548
16549 format %{ "testl $src, $mem" %}
16550 ins_encode %{
16551 __ testl($src$$Register, $mem$$Address);
16552 %}
16553 ins_pipe(ialu_cr_reg_mem);
16554 %}
16555
16556 // Unsigned compare Instructions; really, same as signed except they
16557 // produce an rFlagsRegU instead of rFlagsReg.
16558 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16559 %{
16560 match(Set cr (CmpU op1 op2));
16561
16562 format %{ "cmpl $op1, $op2\t# unsigned" %}
16563 ins_encode %{
16564 __ cmpl($op1$$Register, $op2$$Register);
16565 %}
16566 ins_pipe(ialu_cr_reg_reg);
16567 %}
16568
16569 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16570 %{
16571 match(Set cr (CmpU op1 op2));
16572
16573 format %{ "cmpl $op1, $op2\t# unsigned" %}
16574 ins_encode %{
16575 __ cmpl($op1$$Register, $op2$$constant);
16576 %}
16577 ins_pipe(ialu_cr_reg_imm);
16578 %}
16579
16580 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16581 %{
16582 match(Set cr (CmpU op1 (LoadI op2)));
16583
16584 ins_cost(500); // XXX
16585 format %{ "cmpl $op1, $op2\t# unsigned" %}
16586 ins_encode %{
16587 __ cmpl($op1$$Register, $op2$$Address);
16588 %}
16589 ins_pipe(ialu_cr_reg_mem);
16590 %}
16591
16592 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16593 %{
16594 match(Set cr (CmpU src zero));
16595
16596 format %{ "testl $src, $src\t# unsigned" %}
16597 ins_encode %{
16598 __ testl($src$$Register, $src$$Register);
16599 %}
16600 ins_pipe(ialu_cr_reg_imm);
16601 %}
16602
16603 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16604 %{
16605 match(Set cr (CmpP op1 op2));
16606
16607 format %{ "cmpq $op1, $op2\t# ptr" %}
16608 ins_encode %{
16609 __ cmpq($op1$$Register, $op2$$Register);
16610 %}
16611 ins_pipe(ialu_cr_reg_reg);
16612 %}
16613
16614 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16615 %{
16616 match(Set cr (CmpP op1 (LoadP op2)));
16617 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16618
16619 ins_cost(500); // XXX
16620 format %{ "cmpq $op1, $op2\t# ptr" %}
16621 ins_encode %{
16622 __ cmpq($op1$$Register, $op2$$Address);
16623 %}
16624 ins_pipe(ialu_cr_reg_mem);
16625 %}
16626
16627 // XXX this is generalized by compP_rReg_mem???
16628 // Compare raw pointer (used in out-of-heap check).
16629 // Only works because non-oop pointers must be raw pointers
16630 // and raw pointers have no anti-dependencies.
16631 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16632 %{
16633 predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
16634 n->in(2)->as_Load()->barrier_data() == 0);
16635 match(Set cr (CmpP op1 (LoadP op2)));
16636
16637 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16638 ins_encode %{
16639 __ cmpq($op1$$Register, $op2$$Address);
16640 %}
16641 ins_pipe(ialu_cr_reg_mem);
16642 %}
16643
16644 // This will generate a signed flags result. This should be OK since
16645 // any compare to a zero should be eq/neq.
16646 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16647 %{
16648 match(Set cr (CmpP src zero));
16649
16650 format %{ "testq $src, $src\t# ptr" %}
16651 ins_encode %{
16652 __ testq($src$$Register, $src$$Register);
16653 %}
16654 ins_pipe(ialu_cr_reg_imm);
16655 %}
16656
16657 // This will generate a signed flags result. This should be OK since
16658 // any compare to a zero should be eq/neq.
16659 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16660 %{
16661 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16662 n->in(1)->as_Load()->barrier_data() == 0);
16663 match(Set cr (CmpP (LoadP op) zero));
16664
16665 ins_cost(500); // XXX
16666 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16667 ins_encode %{
16668 __ testq($op$$Address, 0xFFFFFFFF);
16669 %}
16670 ins_pipe(ialu_cr_reg_imm);
16671 %}
16672
16673 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16674 %{
16675 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16676 n->in(1)->as_Load()->barrier_data() == 0);
16677 match(Set cr (CmpP (LoadP mem) zero));
16678
16679 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16680 ins_encode %{
16681 __ cmpq(r12, $mem$$Address);
16682 %}
16683 ins_pipe(ialu_cr_reg_mem);
16684 %}
16685
16686 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16687 %{
16688 match(Set cr (CmpN op1 op2));
16689
16690 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16691 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16692 ins_pipe(ialu_cr_reg_reg);
16693 %}
16694
16695 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16696 %{
16697 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16698 match(Set cr (CmpN src (LoadN mem)));
16699
16700 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16701 ins_encode %{
16702 __ cmpl($src$$Register, $mem$$Address);
16703 %}
16704 ins_pipe(ialu_cr_reg_mem);
16705 %}
16706
16707 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16708 match(Set cr (CmpN op1 op2));
16709
16710 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16711 ins_encode %{
16712 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16713 %}
16714 ins_pipe(ialu_cr_reg_imm);
16715 %}
16716
16717 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16718 %{
16719 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16720 match(Set cr (CmpN src (LoadN mem)));
16721
16722 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16723 ins_encode %{
16724 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16725 %}
16726 ins_pipe(ialu_cr_reg_mem);
16727 %}
16728
16729 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16730 match(Set cr (CmpN op1 op2));
16731
16732 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16733 ins_encode %{
16734 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16735 %}
16736 ins_pipe(ialu_cr_reg_imm);
16737 %}
16738
16739 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16740 %{
16741 predicate(!UseCompactObjectHeaders);
16742 match(Set cr (CmpN src (LoadNKlass mem)));
16743
16744 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16745 ins_encode %{
16746 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16747 %}
16748 ins_pipe(ialu_cr_reg_mem);
16749 %}
16750
16751 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16752 match(Set cr (CmpN src zero));
16753
16754 format %{ "testl $src, $src\t# compressed ptr" %}
16755 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16756 ins_pipe(ialu_cr_reg_imm);
16757 %}
16758
16759 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16760 %{
16761 predicate(CompressedOops::base() != nullptr &&
16762 n->in(1)->as_Load()->barrier_data() == 0);
16763 match(Set cr (CmpN (LoadN mem) zero));
16764
16765 ins_cost(500); // XXX
16766 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16767 ins_encode %{
16768 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16769 %}
16770 ins_pipe(ialu_cr_reg_mem);
16771 %}
16772
16773 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16774 %{
16775 predicate(CompressedOops::base() == nullptr &&
16776 n->in(1)->as_Load()->barrier_data() == 0);
16777 match(Set cr (CmpN (LoadN mem) zero));
16778
16779 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16780 ins_encode %{
16781 __ cmpl(r12, $mem$$Address);
16782 %}
16783 ins_pipe(ialu_cr_reg_mem);
16784 %}
16785
16786 // Yanked all unsigned pointer compare operations.
16787 // Pointer compares are done with CmpP which is already unsigned.
16788
16789 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16790 %{
16791 match(Set cr (CmpL op1 op2));
16792
16793 format %{ "cmpq $op1, $op2" %}
16794 ins_encode %{
16795 __ cmpq($op1$$Register, $op2$$Register);
16796 %}
16797 ins_pipe(ialu_cr_reg_reg);
16798 %}
16799
16800 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16801 %{
16802 match(Set cr (CmpL op1 op2));
16803
16804 format %{ "cmpq $op1, $op2" %}
16805 ins_encode %{
16806 __ cmpq($op1$$Register, $op2$$constant);
16807 %}
16808 ins_pipe(ialu_cr_reg_imm);
16809 %}
16810
16811 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16812 %{
16813 match(Set cr (CmpL op1 (LoadL op2)));
16814
16815 format %{ "cmpq $op1, $op2" %}
16816 ins_encode %{
16817 __ cmpq($op1$$Register, $op2$$Address);
16818 %}
16819 ins_pipe(ialu_cr_reg_mem);
16820 %}
16821
16822 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16823 %{
16824 match(Set cr (CmpL src zero));
16825
16826 format %{ "testq $src, $src" %}
16827 ins_encode %{
16828 __ testq($src$$Register, $src$$Register);
16829 %}
16830 ins_pipe(ialu_cr_reg_imm);
16831 %}
16832
16833 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16834 %{
16835 match(Set cr (CmpL (AndL src con) zero));
16836
16837 format %{ "testq $src, $con\t# long" %}
16838 ins_encode %{
16839 __ testq($src$$Register, $con$$constant);
16840 %}
16841 ins_pipe(ialu_cr_reg_imm);
16842 %}
16843
16844 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16845 %{
16846 match(Set cr (CmpL (AndL src1 src2) zero));
16847
16848 format %{ "testq $src1, $src2\t# long" %}
16849 ins_encode %{
16850 __ testq($src1$$Register, $src2$$Register);
16851 %}
16852 ins_pipe(ialu_cr_reg_imm);
16853 %}
16854
16855 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16856 %{
16857 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16858
16859 format %{ "testq $src, $mem" %}
16860 ins_encode %{
16861 __ testq($src$$Register, $mem$$Address);
16862 %}
16863 ins_pipe(ialu_cr_reg_mem);
16864 %}
16865
16866 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16867 %{
16868 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16869
16870 format %{ "testq $src, $mem" %}
16871 ins_encode %{
16872 __ testq($src$$Register, $mem$$Address);
16873 %}
16874 ins_pipe(ialu_cr_reg_mem);
16875 %}
16876
16877 // Manifest a CmpU result in an integer register. Very painful.
16878 // This is the test to avoid.
16879 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16880 %{
16881 match(Set dst (CmpU3 src1 src2));
16882 effect(KILL flags);
16883
16884 ins_cost(275); // XXX
16885 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16886 "movl $dst, -1\n\t"
16887 "jb,u done\n\t"
16888 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16889 "done:" %}
16890 ins_encode %{
16891 Label done;
16892 __ cmpl($src1$$Register, $src2$$Register);
16893 __ movl($dst$$Register, -1);
16894 __ jccb(Assembler::below, done);
16895 __ setcc(Assembler::notZero, $dst$$Register);
16896 __ bind(done);
16897 %}
16898 ins_pipe(pipe_slow);
16899 %}
16900
16901 // Manifest a CmpL result in an integer register. Very painful.
16902 // This is the test to avoid.
16903 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16904 %{
16905 match(Set dst (CmpL3 src1 src2));
16906 effect(KILL flags);
16907
16908 ins_cost(275); // XXX
16909 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16910 "movl $dst, -1\n\t"
16911 "jl,s done\n\t"
16912 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16913 "done:" %}
16914 ins_encode %{
16915 Label done;
16916 __ cmpq($src1$$Register, $src2$$Register);
16917 __ movl($dst$$Register, -1);
16918 __ jccb(Assembler::less, done);
16919 __ setcc(Assembler::notZero, $dst$$Register);
16920 __ bind(done);
16921 %}
16922 ins_pipe(pipe_slow);
16923 %}
16924
16925 // Manifest a CmpUL result in an integer register. Very painful.
16926 // This is the test to avoid.
16927 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16928 %{
16929 match(Set dst (CmpUL3 src1 src2));
16930 effect(KILL flags);
16931
16932 ins_cost(275); // XXX
16933 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16934 "movl $dst, -1\n\t"
16935 "jb,u done\n\t"
16936 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16937 "done:" %}
16938 ins_encode %{
16939 Label done;
16940 __ cmpq($src1$$Register, $src2$$Register);
16941 __ movl($dst$$Register, -1);
16942 __ jccb(Assembler::below, done);
16943 __ setcc(Assembler::notZero, $dst$$Register);
16944 __ bind(done);
16945 %}
16946 ins_pipe(pipe_slow);
16947 %}
16948
16949 // Unsigned long compare Instructions; really, same as signed long except they
16950 // produce an rFlagsRegU instead of rFlagsReg.
16951 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16952 %{
16953 match(Set cr (CmpUL op1 op2));
16954
16955 format %{ "cmpq $op1, $op2\t# unsigned" %}
16956 ins_encode %{
16957 __ cmpq($op1$$Register, $op2$$Register);
16958 %}
16959 ins_pipe(ialu_cr_reg_reg);
16960 %}
16961
16962 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16963 %{
16964 match(Set cr (CmpUL op1 op2));
16965
16966 format %{ "cmpq $op1, $op2\t# unsigned" %}
16967 ins_encode %{
16968 __ cmpq($op1$$Register, $op2$$constant);
16969 %}
16970 ins_pipe(ialu_cr_reg_imm);
16971 %}
16972
16973 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16974 %{
16975 match(Set cr (CmpUL op1 (LoadL op2)));
16976
16977 format %{ "cmpq $op1, $op2\t# unsigned" %}
16978 ins_encode %{
16979 __ cmpq($op1$$Register, $op2$$Address);
16980 %}
16981 ins_pipe(ialu_cr_reg_mem);
16982 %}
16983
16984 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16985 %{
16986 match(Set cr (CmpUL src zero));
16987
16988 format %{ "testq $src, $src\t# unsigned" %}
16989 ins_encode %{
16990 __ testq($src$$Register, $src$$Register);
16991 %}
16992 ins_pipe(ialu_cr_reg_imm);
16993 %}
16994
16995 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16996 %{
16997 match(Set cr (CmpI (LoadB mem) imm));
16998
16999 ins_cost(125);
17000 format %{ "cmpb $mem, $imm" %}
17001 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
17002 ins_pipe(ialu_cr_reg_mem);
17003 %}
17004
17005 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
17006 %{
17007 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
17008
17009 ins_cost(125);
17010 format %{ "testb $mem, $imm\t# ubyte" %}
17011 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17012 ins_pipe(ialu_cr_reg_mem);
17013 %}
17014
17015 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
17016 %{
17017 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
17018
17019 ins_cost(125);
17020 format %{ "testb $mem, $imm\t# byte" %}
17021 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
17022 ins_pipe(ialu_cr_reg_mem);
17023 %}
17024
17025 //----------Max and Min--------------------------------------------------------
17026 // Min Instructions
17027
17028 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
17029 %{
17030 predicate(!UseAPX);
17031 effect(USE_DEF dst, USE src, USE cr);
17032
17033 format %{ "cmovlgt $dst, $src\t# min" %}
17034 ins_encode %{
17035 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
17036 %}
17037 ins_pipe(pipe_cmov_reg);
17038 %}
17039
17040 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17041 %{
17042 predicate(UseAPX);
17043 effect(DEF dst, USE src1, USE src2, USE cr);
17044
17045 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
17046 ins_encode %{
17047 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
17048 %}
17049 ins_pipe(pipe_cmov_reg);
17050 %}
17051
17052 instruct minI_rReg(rRegI dst, rRegI src)
17053 %{
17054 predicate(!UseAPX);
17055 match(Set dst (MinI dst src));
17056
17057 ins_cost(200);
17058 expand %{
17059 rFlagsReg cr;
17060 compI_rReg(cr, dst, src);
17061 cmovI_reg_g(dst, src, cr);
17062 %}
17063 %}
17064
17065 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17066 %{
17067 predicate(UseAPX);
17068 match(Set dst (MinI src1 src2));
17069 effect(DEF dst, USE src1, USE src2);
17070 flag(PD::Flag_ndd_demotable_opr1);
17071
17072 ins_cost(200);
17073 expand %{
17074 rFlagsReg cr;
17075 compI_rReg(cr, src1, src2);
17076 cmovI_reg_g_ndd(dst, src1, src2, cr);
17077 %}
17078 %}
17079
17080 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17081 %{
17082 predicate(!UseAPX);
17083 effect(USE_DEF dst, USE src, USE cr);
17084
17085 format %{ "cmovllt $dst, $src\t# max" %}
17086 ins_encode %{
17087 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17088 %}
17089 ins_pipe(pipe_cmov_reg);
17090 %}
17091
17092 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17093 %{
17094 predicate(UseAPX);
17095 effect(DEF dst, USE src1, USE src2, USE cr);
17096
17097 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17098 ins_encode %{
17099 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17100 %}
17101 ins_pipe(pipe_cmov_reg);
17102 %}
17103
17104 instruct maxI_rReg(rRegI dst, rRegI src)
17105 %{
17106 predicate(!UseAPX);
17107 match(Set dst (MaxI dst src));
17108
17109 ins_cost(200);
17110 expand %{
17111 rFlagsReg cr;
17112 compI_rReg(cr, dst, src);
17113 cmovI_reg_l(dst, src, cr);
17114 %}
17115 %}
17116
17117 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17118 %{
17119 predicate(UseAPX);
17120 match(Set dst (MaxI src1 src2));
17121 effect(DEF dst, USE src1, USE src2);
17122 flag(PD::Flag_ndd_demotable_opr1);
17123
17124 ins_cost(200);
17125 expand %{
17126 rFlagsReg cr;
17127 compI_rReg(cr, src1, src2);
17128 cmovI_reg_l_ndd(dst, src1, src2, cr);
17129 %}
17130 %}
17131
17132 // ============================================================================
17133 // Branch Instructions
17134
17135 // Jump Direct - Label defines a relative address from JMP+1
17136 instruct jmpDir(label labl)
17137 %{
17138 match(Goto);
17139 effect(USE labl);
17140
17141 ins_cost(300);
17142 format %{ "jmp $labl" %}
17143 size(5);
17144 ins_encode %{
17145 Label* L = $labl$$label;
17146 __ jmp(*L, false); // Always long jump
17147 %}
17148 ins_pipe(pipe_jmp);
17149 %}
17150
17151 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17152 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17153 %{
17154 match(If cop cr);
17155 effect(USE labl);
17156
17157 ins_cost(300);
17158 format %{ "j$cop $labl" %}
17159 size(6);
17160 ins_encode %{
17161 Label* L = $labl$$label;
17162 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17163 %}
17164 ins_pipe(pipe_jcc);
17165 %}
17166
17167 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17168 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17169 %{
17170 match(CountedLoopEnd cop cr);
17171 effect(USE labl);
17172
17173 ins_cost(300);
17174 format %{ "j$cop $labl\t# loop end" %}
17175 size(6);
17176 ins_encode %{
17177 Label* L = $labl$$label;
17178 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17179 %}
17180 ins_pipe(pipe_jcc);
17181 %}
17182
17183 // Jump Direct Conditional - using unsigned comparison
17184 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17185 match(If cop cmp);
17186 effect(USE labl);
17187
17188 ins_cost(300);
17189 format %{ "j$cop,u $labl" %}
17190 size(6);
17191 ins_encode %{
17192 Label* L = $labl$$label;
17193 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17194 %}
17195 ins_pipe(pipe_jcc);
17196 %}
17197
17198 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17199 match(If cop cmp);
17200 effect(USE labl);
17201
17202 ins_cost(200);
17203 format %{ "j$cop,u $labl" %}
17204 size(6);
17205 ins_encode %{
17206 Label* L = $labl$$label;
17207 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17208 %}
17209 ins_pipe(pipe_jcc);
17210 %}
17211
17212 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17213 match(If cop cmp);
17214 effect(USE labl);
17215
17216 ins_cost(200);
17217 format %{ $$template
17218 if ($cop$$cmpcode == Assembler::notEqual) {
17219 $$emit$$"jp,u $labl\n\t"
17220 $$emit$$"j$cop,u $labl"
17221 } else {
17222 $$emit$$"jp,u done\n\t"
17223 $$emit$$"j$cop,u $labl\n\t"
17224 $$emit$$"done:"
17225 }
17226 %}
17227 ins_encode %{
17228 Label* l = $labl$$label;
17229 if ($cop$$cmpcode == Assembler::notEqual) {
17230 __ jcc(Assembler::parity, *l, false);
17231 __ jcc(Assembler::notEqual, *l, false);
17232 } else if ($cop$$cmpcode == Assembler::equal) {
17233 Label done;
17234 __ jccb(Assembler::parity, done);
17235 __ jcc(Assembler::equal, *l, false);
17236 __ bind(done);
17237 } else {
17238 ShouldNotReachHere();
17239 }
17240 %}
17241 ins_pipe(pipe_jcc);
17242 %}
17243
17244 // Jump Direct Conditional - using signed and unsigned comparison
17245 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17246 match(If cop cmp);
17247 effect(USE labl);
17248
17249 ins_cost(200);
17250 format %{ "j$cop,su $labl" %}
17251 size(6);
17252 ins_encode %{
17253 Label* L = $labl$$label;
17254 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17255 %}
17256 ins_pipe(pipe_jcc);
17257 %}
17258
17259 // ============================================================================
17260 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17261 // superklass array for an instance of the superklass. Set a hidden
17262 // internal cache on a hit (cache is checked with exposed code in
17263 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17264 // encoding ALSO sets flags.
17265
17266 instruct partialSubtypeCheck(rdi_RegP result,
17267 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17268 rFlagsReg cr)
17269 %{
17270 match(Set result (PartialSubtypeCheck sub super));
17271 predicate(!UseSecondarySupersTable);
17272 effect(KILL rcx, KILL cr);
17273
17274 ins_cost(1100); // slightly larger than the next version
17275 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17276 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17277 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17278 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17279 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17280 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17281 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17282 "miss:\t" %}
17283
17284 ins_encode %{
17285 Label miss;
17286 // NB: Callers may assume that, when $result is a valid register,
17287 // check_klass_subtype_slow_path_linear sets it to a nonzero
17288 // value.
17289 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17290 $rcx$$Register, $result$$Register,
17291 nullptr, &miss,
17292 /*set_cond_codes:*/ true);
17293 __ xorptr($result$$Register, $result$$Register);
17294 __ bind(miss);
17295 %}
17296
17297 ins_pipe(pipe_slow);
17298 %}
17299
17300 // ============================================================================
17301 // Two versions of hashtable-based partialSubtypeCheck, both used when
17302 // we need to search for a super class in the secondary supers array.
17303 // The first is used when we don't know _a priori_ the class being
17304 // searched for. The second, far more common, is used when we do know:
17305 // this is used for instanceof, checkcast, and any case where C2 can
17306 // determine it by constant propagation.
17307
17308 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17309 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17310 rFlagsReg cr)
17311 %{
17312 match(Set result (PartialSubtypeCheck sub super));
17313 predicate(UseSecondarySupersTable);
17314 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17315
17316 ins_cost(1000);
17317 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17318
17319 ins_encode %{
17320 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17321 $temp3$$Register, $temp4$$Register, $result$$Register);
17322 %}
17323
17324 ins_pipe(pipe_slow);
17325 %}
17326
17327 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17328 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17329 rFlagsReg cr)
17330 %{
17331 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17332 predicate(UseSecondarySupersTable);
17333 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17334
17335 ins_cost(700); // smaller than the next version
17336 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17337
17338 ins_encode %{
17339 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17340 if (InlineSecondarySupersTest) {
17341 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17342 $temp3$$Register, $temp4$$Register, $result$$Register,
17343 super_klass_slot);
17344 } else {
17345 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17346 }
17347 %}
17348
17349 ins_pipe(pipe_slow);
17350 %}
17351
17352 // ============================================================================
17353 // Branch Instructions -- short offset versions
17354 //
17355 // These instructions are used to replace jumps of a long offset (the default
17356 // match) with jumps of a shorter offset. These instructions are all tagged
17357 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17358 // match rules in general matching. Instead, the ADLC generates a conversion
17359 // method in the MachNode which can be used to do in-place replacement of the
17360 // long variant with the shorter variant. The compiler will determine if a
17361 // branch can be taken by the is_short_branch_offset() predicate in the machine
17362 // specific code section of the file.
17363
17364 // Jump Direct - Label defines a relative address from JMP+1
17365 instruct jmpDir_short(label labl) %{
17366 match(Goto);
17367 effect(USE labl);
17368
17369 ins_cost(300);
17370 format %{ "jmp,s $labl" %}
17371 size(2);
17372 ins_encode %{
17373 Label* L = $labl$$label;
17374 __ jmpb(*L);
17375 %}
17376 ins_pipe(pipe_jmp);
17377 ins_short_branch(1);
17378 %}
17379
17380 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17381 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17382 match(If cop cr);
17383 effect(USE labl);
17384
17385 ins_cost(300);
17386 format %{ "j$cop,s $labl" %}
17387 size(2);
17388 ins_encode %{
17389 Label* L = $labl$$label;
17390 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17391 %}
17392 ins_pipe(pipe_jcc);
17393 ins_short_branch(1);
17394 %}
17395
17396 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17397 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17398 match(CountedLoopEnd cop cr);
17399 effect(USE labl);
17400
17401 ins_cost(300);
17402 format %{ "j$cop,s $labl\t# loop end" %}
17403 size(2);
17404 ins_encode %{
17405 Label* L = $labl$$label;
17406 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17407 %}
17408 ins_pipe(pipe_jcc);
17409 ins_short_branch(1);
17410 %}
17411
17412 // Jump Direct Conditional - using unsigned comparison
17413 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17414 match(If cop cmp);
17415 effect(USE labl);
17416
17417 ins_cost(300);
17418 format %{ "j$cop,us $labl" %}
17419 size(2);
17420 ins_encode %{
17421 Label* L = $labl$$label;
17422 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17423 %}
17424 ins_pipe(pipe_jcc);
17425 ins_short_branch(1);
17426 %}
17427
17428 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17429 match(If cop cmp);
17430 effect(USE labl);
17431
17432 ins_cost(300);
17433 format %{ "j$cop,us $labl" %}
17434 size(2);
17435 ins_encode %{
17436 Label* L = $labl$$label;
17437 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17438 %}
17439 ins_pipe(pipe_jcc);
17440 ins_short_branch(1);
17441 %}
17442
17443 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17444 match(If cop cmp);
17445 effect(USE labl);
17446
17447 ins_cost(300);
17448 format %{ $$template
17449 if ($cop$$cmpcode == Assembler::notEqual) {
17450 $$emit$$"jp,u,s $labl\n\t"
17451 $$emit$$"j$cop,u,s $labl"
17452 } else {
17453 $$emit$$"jp,u,s done\n\t"
17454 $$emit$$"j$cop,u,s $labl\n\t"
17455 $$emit$$"done:"
17456 }
17457 %}
17458 size(4);
17459 ins_encode %{
17460 Label* l = $labl$$label;
17461 if ($cop$$cmpcode == Assembler::notEqual) {
17462 __ jccb(Assembler::parity, *l);
17463 __ jccb(Assembler::notEqual, *l);
17464 } else if ($cop$$cmpcode == Assembler::equal) {
17465 Label done;
17466 __ jccb(Assembler::parity, done);
17467 __ jccb(Assembler::equal, *l);
17468 __ bind(done);
17469 } else {
17470 ShouldNotReachHere();
17471 }
17472 %}
17473 ins_pipe(pipe_jcc);
17474 ins_short_branch(1);
17475 %}
17476
17477 // Jump Direct Conditional - using signed and unsigned comparison
17478 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17479 match(If cop cmp);
17480 effect(USE labl);
17481
17482 ins_cost(300);
17483 format %{ "j$cop,sus $labl" %}
17484 size(2);
17485 ins_encode %{
17486 Label* L = $labl$$label;
17487 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17488 %}
17489 ins_pipe(pipe_jcc);
17490 ins_short_branch(1);
17491 %}
17492
17493 // ============================================================================
17494 // inlined locking and unlocking
17495
17496 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17497 match(Set cr (FastLock object box));
17498 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17499 ins_cost(300);
17500 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17501 ins_encode %{
17502 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17503 %}
17504 ins_pipe(pipe_slow);
17505 %}
17506
17507 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17508 match(Set cr (FastUnlock object rax_reg));
17509 effect(TEMP tmp, USE_KILL rax_reg);
17510 ins_cost(300);
17511 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17512 ins_encode %{
17513 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17514 %}
17515 ins_pipe(pipe_slow);
17516 %}
17517
17518
17519 // ============================================================================
17520 // Safepoint Instructions
17521 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17522 %{
17523 match(SafePoint poll);
17524 effect(KILL cr, USE poll);
17525
17526 format %{ "testl rax, [$poll]\t"
17527 "# Safepoint: poll for GC" %}
17528 ins_cost(125);
17529 ins_encode %{
17530 __ relocate(relocInfo::poll_type);
17531 address pre_pc = __ pc();
17532 __ testl(rax, Address($poll$$Register, 0));
17533 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17534 %}
17535 ins_pipe(ialu_reg_mem);
17536 %}
17537
17538 instruct mask_all_evexL(kReg dst, rRegL src) %{
17539 match(Set dst (MaskAll src));
17540 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17541 ins_encode %{
17542 int mask_len = Matcher::vector_length(this);
17543 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17544 %}
17545 ins_pipe( pipe_slow );
17546 %}
17547
17548 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17549 predicate(Matcher::vector_length(n) > 32);
17550 match(Set dst (MaskAll src));
17551 effect(TEMP tmp);
17552 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17553 ins_encode %{
17554 int mask_len = Matcher::vector_length(this);
17555 __ movslq($tmp$$Register, $src$$Register);
17556 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17557 %}
17558 ins_pipe( pipe_slow );
17559 %}
17560
17561 // ============================================================================
17562 // Procedure Call/Return Instructions
17563 // Call Java Static Instruction
17564 // Note: If this code changes, the corresponding ret_addr_offset() and
17565 // compute_padding() functions will have to be adjusted.
17566 instruct CallStaticJavaDirect(method meth) %{
17567 match(CallStaticJava);
17568 effect(USE meth);
17569
17570 ins_cost(300);
17571 format %{ "call,static " %}
17572 opcode(0xE8); /* E8 cd */
17573 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17574 ins_pipe(pipe_slow);
17575 ins_alignment(4);
17576 %}
17577
17578 // Call Java Dynamic Instruction
17579 // Note: If this code changes, the corresponding ret_addr_offset() and
17580 // compute_padding() functions will have to be adjusted.
17581 instruct CallDynamicJavaDirect(method meth)
17582 %{
17583 match(CallDynamicJava);
17584 effect(USE meth);
17585
17586 ins_cost(300);
17587 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17588 "call,dynamic " %}
17589 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17590 ins_pipe(pipe_slow);
17591 ins_alignment(4);
17592 %}
17593
17594 // Call Runtime Instruction
17595 instruct CallRuntimeDirect(method meth)
17596 %{
17597 match(CallRuntime);
17598 effect(USE meth);
17599
17600 ins_cost(300);
17601 format %{ "call,runtime " %}
17602 ins_encode(clear_avx, Java_To_Runtime(meth));
17603 ins_pipe(pipe_slow);
17604 %}
17605
17606 // Call runtime without safepoint
17607 instruct CallLeafDirect(method meth)
17608 %{
17609 match(CallLeaf);
17610 effect(USE meth);
17611
17612 ins_cost(300);
17613 format %{ "call_leaf,runtime " %}
17614 ins_encode(clear_avx, Java_To_Runtime(meth));
17615 ins_pipe(pipe_slow);
17616 %}
17617
17618 // Call runtime without safepoint and with vector arguments
17619 instruct CallLeafDirectVector(method meth)
17620 %{
17621 match(CallLeafVector);
17622 effect(USE meth);
17623
17624 ins_cost(300);
17625 format %{ "call_leaf,vector " %}
17626 ins_encode(Java_To_Runtime(meth));
17627 ins_pipe(pipe_slow);
17628 %}
17629
17630 // Call runtime without safepoint
17631 // entry point is null, target holds the address to call
17632 instruct CallLeafNoFPInDirect(rRegP target)
17633 %{
17634 predicate(n->as_Call()->entry_point() == nullptr);
17635 match(CallLeafNoFP target);
17636
17637 ins_cost(300);
17638 format %{ "call_leaf_nofp,runtime indirect " %}
17639 ins_encode %{
17640 __ call($target$$Register);
17641 %}
17642
17643 ins_pipe(pipe_slow);
17644 %}
17645
17646 // Call runtime without safepoint
17647 instruct CallLeafNoFPDirect(method meth)
17648 %{
17649 predicate(n->as_Call()->entry_point() != nullptr);
17650 match(CallLeafNoFP);
17651 effect(USE meth);
17652
17653 ins_cost(300);
17654 format %{ "call_leaf_nofp,runtime " %}
17655 ins_encode(clear_avx, Java_To_Runtime(meth));
17656 ins_pipe(pipe_slow);
17657 %}
17658
17659 // Return Instruction
17660 // Remove the return address & jump to it.
17661 // Notice: We always emit a nop after a ret to make sure there is room
17662 // for safepoint patching
17663 instruct Ret()
17664 %{
17665 match(Return);
17666
17667 format %{ "ret" %}
17668 ins_encode %{
17669 __ ret(0);
17670 %}
17671 ins_pipe(pipe_jmp);
17672 %}
17673
17674 // Tail Call; Jump from runtime stub to Java code.
17675 // Also known as an 'interprocedural jump'.
17676 // Target of jump will eventually return to caller.
17677 // TailJump below removes the return address.
17678 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17679 // emitted just above the TailCall which has reset rbp to the caller state.
17680 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17681 %{
17682 match(TailCall jump_target method_ptr);
17683
17684 ins_cost(300);
17685 format %{ "jmp $jump_target\t# rbx holds method" %}
17686 ins_encode %{
17687 __ jmp($jump_target$$Register);
17688 %}
17689 ins_pipe(pipe_jmp);
17690 %}
17691
17692 // Tail Jump; remove the return address; jump to target.
17693 // TailCall above leaves the return address around.
17694 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17695 %{
17696 match(TailJump jump_target ex_oop);
17697
17698 ins_cost(300);
17699 format %{ "popq rdx\t# pop return address\n\t"
17700 "jmp $jump_target" %}
17701 ins_encode %{
17702 __ popq(as_Register(RDX_enc));
17703 __ jmp($jump_target$$Register);
17704 %}
17705 ins_pipe(pipe_jmp);
17706 %}
17707
17708 // Forward exception.
17709 instruct ForwardExceptionjmp()
17710 %{
17711 match(ForwardException);
17712
17713 format %{ "jmp forward_exception_stub" %}
17714 ins_encode %{
17715 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17716 %}
17717 ins_pipe(pipe_jmp);
17718 %}
17719
17720 // Create exception oop: created by stack-crawling runtime code.
17721 // Created exception is now available to this handler, and is setup
17722 // just prior to jumping to this handler. No code emitted.
17723 instruct CreateException(rax_RegP ex_oop)
17724 %{
17725 match(Set ex_oop (CreateEx));
17726
17727 size(0);
17728 // use the following format syntax
17729 format %{ "# exception oop is in rax; no code emitted" %}
17730 ins_encode();
17731 ins_pipe(empty);
17732 %}
17733
17734 // Rethrow exception:
17735 // The exception oop will come in the first argument position.
17736 // Then JUMP (not call) to the rethrow stub code.
17737 instruct RethrowException()
17738 %{
17739 match(Rethrow);
17740
17741 // use the following format syntax
17742 format %{ "jmp rethrow_stub" %}
17743 ins_encode %{
17744 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17745 %}
17746 ins_pipe(pipe_jmp);
17747 %}
17748
17749 // ============================================================================
17750 // This name is KNOWN by the ADLC and cannot be changed.
17751 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17752 // for this guy.
17753 instruct tlsLoadP(r15_RegP dst) %{
17754 match(Set dst (ThreadLocal));
17755 effect(DEF dst);
17756
17757 size(0);
17758 format %{ "# TLS is in R15" %}
17759 ins_encode( /*empty encoding*/ );
17760 ins_pipe(ialu_reg_reg);
17761 %}
17762
17763 instruct addF_reg(regF dst, regF src) %{
17764 predicate(UseAVX == 0);
17765 match(Set dst (AddF dst src));
17766
17767 format %{ "addss $dst, $src" %}
17768 ins_cost(150);
17769 ins_encode %{
17770 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17771 %}
17772 ins_pipe(pipe_slow);
17773 %}
17774
17775 instruct addF_mem(regF dst, memory src) %{
17776 predicate(UseAVX == 0);
17777 match(Set dst (AddF dst (LoadF src)));
17778
17779 format %{ "addss $dst, $src" %}
17780 ins_cost(150);
17781 ins_encode %{
17782 __ addss($dst$$XMMRegister, $src$$Address);
17783 %}
17784 ins_pipe(pipe_slow);
17785 %}
17786
17787 instruct addF_imm(regF dst, immF con) %{
17788 predicate(UseAVX == 0);
17789 match(Set dst (AddF dst con));
17790 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17791 ins_cost(150);
17792 ins_encode %{
17793 __ addss($dst$$XMMRegister, $constantaddress($con));
17794 %}
17795 ins_pipe(pipe_slow);
17796 %}
17797
17798 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17799 predicate(UseAVX > 0);
17800 match(Set dst (AddF src1 src2));
17801
17802 format %{ "vaddss $dst, $src1, $src2" %}
17803 ins_cost(150);
17804 ins_encode %{
17805 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17806 %}
17807 ins_pipe(pipe_slow);
17808 %}
17809
17810 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17811 predicate(UseAVX > 0);
17812 match(Set dst (AddF src1 (LoadF src2)));
17813
17814 format %{ "vaddss $dst, $src1, $src2" %}
17815 ins_cost(150);
17816 ins_encode %{
17817 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17818 %}
17819 ins_pipe(pipe_slow);
17820 %}
17821
17822 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17823 predicate(UseAVX > 0);
17824 match(Set dst (AddF src con));
17825
17826 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17827 ins_cost(150);
17828 ins_encode %{
17829 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17830 %}
17831 ins_pipe(pipe_slow);
17832 %}
17833
17834 instruct addD_reg(regD dst, regD src) %{
17835 predicate(UseAVX == 0);
17836 match(Set dst (AddD dst src));
17837
17838 format %{ "addsd $dst, $src" %}
17839 ins_cost(150);
17840 ins_encode %{
17841 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17842 %}
17843 ins_pipe(pipe_slow);
17844 %}
17845
17846 instruct addD_mem(regD dst, memory src) %{
17847 predicate(UseAVX == 0);
17848 match(Set dst (AddD dst (LoadD src)));
17849
17850 format %{ "addsd $dst, $src" %}
17851 ins_cost(150);
17852 ins_encode %{
17853 __ addsd($dst$$XMMRegister, $src$$Address);
17854 %}
17855 ins_pipe(pipe_slow);
17856 %}
17857
17858 instruct addD_imm(regD dst, immD con) %{
17859 predicate(UseAVX == 0);
17860 match(Set dst (AddD dst con));
17861 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17862 ins_cost(150);
17863 ins_encode %{
17864 __ addsd($dst$$XMMRegister, $constantaddress($con));
17865 %}
17866 ins_pipe(pipe_slow);
17867 %}
17868
17869 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17870 predicate(UseAVX > 0);
17871 match(Set dst (AddD src1 src2));
17872
17873 format %{ "vaddsd $dst, $src1, $src2" %}
17874 ins_cost(150);
17875 ins_encode %{
17876 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17877 %}
17878 ins_pipe(pipe_slow);
17879 %}
17880
17881 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17882 predicate(UseAVX > 0);
17883 match(Set dst (AddD src1 (LoadD src2)));
17884
17885 format %{ "vaddsd $dst, $src1, $src2" %}
17886 ins_cost(150);
17887 ins_encode %{
17888 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17889 %}
17890 ins_pipe(pipe_slow);
17891 %}
17892
17893 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17894 predicate(UseAVX > 0);
17895 match(Set dst (AddD src con));
17896
17897 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17898 ins_cost(150);
17899 ins_encode %{
17900 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17901 %}
17902 ins_pipe(pipe_slow);
17903 %}
17904
17905 instruct subF_reg(regF dst, regF src) %{
17906 predicate(UseAVX == 0);
17907 match(Set dst (SubF dst src));
17908
17909 format %{ "subss $dst, $src" %}
17910 ins_cost(150);
17911 ins_encode %{
17912 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17913 %}
17914 ins_pipe(pipe_slow);
17915 %}
17916
17917 instruct subF_mem(regF dst, memory src) %{
17918 predicate(UseAVX == 0);
17919 match(Set dst (SubF dst (LoadF src)));
17920
17921 format %{ "subss $dst, $src" %}
17922 ins_cost(150);
17923 ins_encode %{
17924 __ subss($dst$$XMMRegister, $src$$Address);
17925 %}
17926 ins_pipe(pipe_slow);
17927 %}
17928
17929 instruct subF_imm(regF dst, immF con) %{
17930 predicate(UseAVX == 0);
17931 match(Set dst (SubF dst con));
17932 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17933 ins_cost(150);
17934 ins_encode %{
17935 __ subss($dst$$XMMRegister, $constantaddress($con));
17936 %}
17937 ins_pipe(pipe_slow);
17938 %}
17939
17940 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17941 predicate(UseAVX > 0);
17942 match(Set dst (SubF src1 src2));
17943
17944 format %{ "vsubss $dst, $src1, $src2" %}
17945 ins_cost(150);
17946 ins_encode %{
17947 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17948 %}
17949 ins_pipe(pipe_slow);
17950 %}
17951
17952 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17953 predicate(UseAVX > 0);
17954 match(Set dst (SubF src1 (LoadF src2)));
17955
17956 format %{ "vsubss $dst, $src1, $src2" %}
17957 ins_cost(150);
17958 ins_encode %{
17959 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17960 %}
17961 ins_pipe(pipe_slow);
17962 %}
17963
17964 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17965 predicate(UseAVX > 0);
17966 match(Set dst (SubF src con));
17967
17968 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17969 ins_cost(150);
17970 ins_encode %{
17971 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17972 %}
17973 ins_pipe(pipe_slow);
17974 %}
17975
17976 instruct subD_reg(regD dst, regD src) %{
17977 predicate(UseAVX == 0);
17978 match(Set dst (SubD dst src));
17979
17980 format %{ "subsd $dst, $src" %}
17981 ins_cost(150);
17982 ins_encode %{
17983 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17984 %}
17985 ins_pipe(pipe_slow);
17986 %}
17987
17988 instruct subD_mem(regD dst, memory src) %{
17989 predicate(UseAVX == 0);
17990 match(Set dst (SubD dst (LoadD src)));
17991
17992 format %{ "subsd $dst, $src" %}
17993 ins_cost(150);
17994 ins_encode %{
17995 __ subsd($dst$$XMMRegister, $src$$Address);
17996 %}
17997 ins_pipe(pipe_slow);
17998 %}
17999
18000 instruct subD_imm(regD dst, immD con) %{
18001 predicate(UseAVX == 0);
18002 match(Set dst (SubD dst con));
18003 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18004 ins_cost(150);
18005 ins_encode %{
18006 __ subsd($dst$$XMMRegister, $constantaddress($con));
18007 %}
18008 ins_pipe(pipe_slow);
18009 %}
18010
18011 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
18012 predicate(UseAVX > 0);
18013 match(Set dst (SubD src1 src2));
18014
18015 format %{ "vsubsd $dst, $src1, $src2" %}
18016 ins_cost(150);
18017 ins_encode %{
18018 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18019 %}
18020 ins_pipe(pipe_slow);
18021 %}
18022
18023 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
18024 predicate(UseAVX > 0);
18025 match(Set dst (SubD src1 (LoadD src2)));
18026
18027 format %{ "vsubsd $dst, $src1, $src2" %}
18028 ins_cost(150);
18029 ins_encode %{
18030 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18031 %}
18032 ins_pipe(pipe_slow);
18033 %}
18034
18035 instruct subD_reg_imm(regD dst, regD src, immD con) %{
18036 predicate(UseAVX > 0);
18037 match(Set dst (SubD src con));
18038
18039 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18040 ins_cost(150);
18041 ins_encode %{
18042 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18043 %}
18044 ins_pipe(pipe_slow);
18045 %}
18046
18047 instruct mulF_reg(regF dst, regF src) %{
18048 predicate(UseAVX == 0);
18049 match(Set dst (MulF dst src));
18050
18051 format %{ "mulss $dst, $src" %}
18052 ins_cost(150);
18053 ins_encode %{
18054 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18055 %}
18056 ins_pipe(pipe_slow);
18057 %}
18058
18059 instruct mulF_mem(regF dst, memory src) %{
18060 predicate(UseAVX == 0);
18061 match(Set dst (MulF dst (LoadF src)));
18062
18063 format %{ "mulss $dst, $src" %}
18064 ins_cost(150);
18065 ins_encode %{
18066 __ mulss($dst$$XMMRegister, $src$$Address);
18067 %}
18068 ins_pipe(pipe_slow);
18069 %}
18070
18071 instruct mulF_imm(regF dst, immF con) %{
18072 predicate(UseAVX == 0);
18073 match(Set dst (MulF dst con));
18074 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18075 ins_cost(150);
18076 ins_encode %{
18077 __ mulss($dst$$XMMRegister, $constantaddress($con));
18078 %}
18079 ins_pipe(pipe_slow);
18080 %}
18081
18082 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18083 predicate(UseAVX > 0);
18084 match(Set dst (MulF src1 src2));
18085
18086 format %{ "vmulss $dst, $src1, $src2" %}
18087 ins_cost(150);
18088 ins_encode %{
18089 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18090 %}
18091 ins_pipe(pipe_slow);
18092 %}
18093
18094 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18095 predicate(UseAVX > 0);
18096 match(Set dst (MulF src1 (LoadF src2)));
18097
18098 format %{ "vmulss $dst, $src1, $src2" %}
18099 ins_cost(150);
18100 ins_encode %{
18101 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18102 %}
18103 ins_pipe(pipe_slow);
18104 %}
18105
18106 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18107 predicate(UseAVX > 0);
18108 match(Set dst (MulF src con));
18109
18110 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18111 ins_cost(150);
18112 ins_encode %{
18113 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18114 %}
18115 ins_pipe(pipe_slow);
18116 %}
18117
18118 instruct mulD_reg(regD dst, regD src) %{
18119 predicate(UseAVX == 0);
18120 match(Set dst (MulD dst src));
18121
18122 format %{ "mulsd $dst, $src" %}
18123 ins_cost(150);
18124 ins_encode %{
18125 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18126 %}
18127 ins_pipe(pipe_slow);
18128 %}
18129
18130 instruct mulD_mem(regD dst, memory src) %{
18131 predicate(UseAVX == 0);
18132 match(Set dst (MulD dst (LoadD src)));
18133
18134 format %{ "mulsd $dst, $src" %}
18135 ins_cost(150);
18136 ins_encode %{
18137 __ mulsd($dst$$XMMRegister, $src$$Address);
18138 %}
18139 ins_pipe(pipe_slow);
18140 %}
18141
18142 instruct mulD_imm(regD dst, immD con) %{
18143 predicate(UseAVX == 0);
18144 match(Set dst (MulD dst con));
18145 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18146 ins_cost(150);
18147 ins_encode %{
18148 __ mulsd($dst$$XMMRegister, $constantaddress($con));
18149 %}
18150 ins_pipe(pipe_slow);
18151 %}
18152
18153 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18154 predicate(UseAVX > 0);
18155 match(Set dst (MulD src1 src2));
18156
18157 format %{ "vmulsd $dst, $src1, $src2" %}
18158 ins_cost(150);
18159 ins_encode %{
18160 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18161 %}
18162 ins_pipe(pipe_slow);
18163 %}
18164
18165 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18166 predicate(UseAVX > 0);
18167 match(Set dst (MulD src1 (LoadD src2)));
18168
18169 format %{ "vmulsd $dst, $src1, $src2" %}
18170 ins_cost(150);
18171 ins_encode %{
18172 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18173 %}
18174 ins_pipe(pipe_slow);
18175 %}
18176
18177 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18178 predicate(UseAVX > 0);
18179 match(Set dst (MulD src con));
18180
18181 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18182 ins_cost(150);
18183 ins_encode %{
18184 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18185 %}
18186 ins_pipe(pipe_slow);
18187 %}
18188
18189 instruct divF_reg(regF dst, regF src) %{
18190 predicate(UseAVX == 0);
18191 match(Set dst (DivF dst src));
18192
18193 format %{ "divss $dst, $src" %}
18194 ins_cost(150);
18195 ins_encode %{
18196 __ divss($dst$$XMMRegister, $src$$XMMRegister);
18197 %}
18198 ins_pipe(pipe_slow);
18199 %}
18200
18201 instruct divF_mem(regF dst, memory src) %{
18202 predicate(UseAVX == 0);
18203 match(Set dst (DivF dst (LoadF src)));
18204
18205 format %{ "divss $dst, $src" %}
18206 ins_cost(150);
18207 ins_encode %{
18208 __ divss($dst$$XMMRegister, $src$$Address);
18209 %}
18210 ins_pipe(pipe_slow);
18211 %}
18212
18213 instruct divF_imm(regF dst, immF con) %{
18214 predicate(UseAVX == 0);
18215 match(Set dst (DivF dst con));
18216 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18217 ins_cost(150);
18218 ins_encode %{
18219 __ divss($dst$$XMMRegister, $constantaddress($con));
18220 %}
18221 ins_pipe(pipe_slow);
18222 %}
18223
18224 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18225 predicate(UseAVX > 0);
18226 match(Set dst (DivF src1 src2));
18227
18228 format %{ "vdivss $dst, $src1, $src2" %}
18229 ins_cost(150);
18230 ins_encode %{
18231 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18232 %}
18233 ins_pipe(pipe_slow);
18234 %}
18235
18236 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18237 predicate(UseAVX > 0);
18238 match(Set dst (DivF src1 (LoadF src2)));
18239
18240 format %{ "vdivss $dst, $src1, $src2" %}
18241 ins_cost(150);
18242 ins_encode %{
18243 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18244 %}
18245 ins_pipe(pipe_slow);
18246 %}
18247
18248 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18249 predicate(UseAVX > 0);
18250 match(Set dst (DivF src con));
18251
18252 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18253 ins_cost(150);
18254 ins_encode %{
18255 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18256 %}
18257 ins_pipe(pipe_slow);
18258 %}
18259
18260 instruct divD_reg(regD dst, regD src) %{
18261 predicate(UseAVX == 0);
18262 match(Set dst (DivD dst src));
18263
18264 format %{ "divsd $dst, $src" %}
18265 ins_cost(150);
18266 ins_encode %{
18267 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18268 %}
18269 ins_pipe(pipe_slow);
18270 %}
18271
18272 instruct divD_mem(regD dst, memory src) %{
18273 predicate(UseAVX == 0);
18274 match(Set dst (DivD dst (LoadD src)));
18275
18276 format %{ "divsd $dst, $src" %}
18277 ins_cost(150);
18278 ins_encode %{
18279 __ divsd($dst$$XMMRegister, $src$$Address);
18280 %}
18281 ins_pipe(pipe_slow);
18282 %}
18283
18284 instruct divD_imm(regD dst, immD con) %{
18285 predicate(UseAVX == 0);
18286 match(Set dst (DivD dst con));
18287 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18288 ins_cost(150);
18289 ins_encode %{
18290 __ divsd($dst$$XMMRegister, $constantaddress($con));
18291 %}
18292 ins_pipe(pipe_slow);
18293 %}
18294
18295 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18296 predicate(UseAVX > 0);
18297 match(Set dst (DivD src1 src2));
18298
18299 format %{ "vdivsd $dst, $src1, $src2" %}
18300 ins_cost(150);
18301 ins_encode %{
18302 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18303 %}
18304 ins_pipe(pipe_slow);
18305 %}
18306
18307 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18308 predicate(UseAVX > 0);
18309 match(Set dst (DivD src1 (LoadD src2)));
18310
18311 format %{ "vdivsd $dst, $src1, $src2" %}
18312 ins_cost(150);
18313 ins_encode %{
18314 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18315 %}
18316 ins_pipe(pipe_slow);
18317 %}
18318
18319 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18320 predicate(UseAVX > 0);
18321 match(Set dst (DivD src con));
18322
18323 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18324 ins_cost(150);
18325 ins_encode %{
18326 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18327 %}
18328 ins_pipe(pipe_slow);
18329 %}
18330
18331 instruct absF_reg(regF dst) %{
18332 predicate(UseAVX == 0);
18333 match(Set dst (AbsF dst));
18334 ins_cost(150);
18335 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18336 ins_encode %{
18337 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18338 %}
18339 ins_pipe(pipe_slow);
18340 %}
18341
18342 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18343 predicate(UseAVX > 0);
18344 match(Set dst (AbsF src));
18345 ins_cost(150);
18346 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18347 ins_encode %{
18348 int vlen_enc = Assembler::AVX_128bit;
18349 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18350 ExternalAddress(float_signmask()), vlen_enc);
18351 %}
18352 ins_pipe(pipe_slow);
18353 %}
18354
18355 instruct absD_reg(regD dst) %{
18356 predicate(UseAVX == 0);
18357 match(Set dst (AbsD dst));
18358 ins_cost(150);
18359 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18360 "# abs double by sign masking" %}
18361 ins_encode %{
18362 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18363 %}
18364 ins_pipe(pipe_slow);
18365 %}
18366
18367 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18368 predicate(UseAVX > 0);
18369 match(Set dst (AbsD src));
18370 ins_cost(150);
18371 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18372 "# abs double by sign masking" %}
18373 ins_encode %{
18374 int vlen_enc = Assembler::AVX_128bit;
18375 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18376 ExternalAddress(double_signmask()), vlen_enc);
18377 %}
18378 ins_pipe(pipe_slow);
18379 %}
18380
18381 instruct negF_reg(regF dst) %{
18382 predicate(UseAVX == 0);
18383 match(Set dst (NegF dst));
18384 ins_cost(150);
18385 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18386 ins_encode %{
18387 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18388 %}
18389 ins_pipe(pipe_slow);
18390 %}
18391
18392 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18393 predicate(UseAVX > 0);
18394 match(Set dst (NegF src));
18395 ins_cost(150);
18396 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18397 ins_encode %{
18398 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18399 ExternalAddress(float_signflip()));
18400 %}
18401 ins_pipe(pipe_slow);
18402 %}
18403
18404 instruct negD_reg(regD dst) %{
18405 predicate(UseAVX == 0);
18406 match(Set dst (NegD dst));
18407 ins_cost(150);
18408 format %{ "xorpd $dst, [0x8000000000000000]\t"
18409 "# neg double by sign flipping" %}
18410 ins_encode %{
18411 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18412 %}
18413 ins_pipe(pipe_slow);
18414 %}
18415
18416 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18417 predicate(UseAVX > 0);
18418 match(Set dst (NegD src));
18419 ins_cost(150);
18420 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18421 "# neg double by sign flipping" %}
18422 ins_encode %{
18423 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18424 ExternalAddress(double_signflip()));
18425 %}
18426 ins_pipe(pipe_slow);
18427 %}
18428
18429 // sqrtss instruction needs destination register to be pre initialized for best performance
18430 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18431 instruct sqrtF_reg(regF dst) %{
18432 match(Set dst (SqrtF dst));
18433 format %{ "sqrtss $dst, $dst" %}
18434 ins_encode %{
18435 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18436 %}
18437 ins_pipe(pipe_slow);
18438 %}
18439
18440 // sqrtsd instruction needs destination register to be pre initialized for best performance
18441 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18442 instruct sqrtD_reg(regD dst) %{
18443 match(Set dst (SqrtD dst));
18444 format %{ "sqrtsd $dst, $dst" %}
18445 ins_encode %{
18446 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18447 %}
18448 ins_pipe(pipe_slow);
18449 %}
18450
18451 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18452 effect(TEMP tmp);
18453 match(Set dst (ConvF2HF src));
18454 ins_cost(125);
18455 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18456 ins_encode %{
18457 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18458 %}
18459 ins_pipe( pipe_slow );
18460 %}
18461
18462 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18463 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18464 effect(TEMP ktmp, TEMP rtmp);
18465 match(Set mem (StoreC mem (ConvF2HF src)));
18466 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18467 ins_encode %{
18468 __ movl($rtmp$$Register, 0x1);
18469 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18470 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18471 %}
18472 ins_pipe( pipe_slow );
18473 %}
18474
18475 instruct vconvF2HF(vec dst, vec src) %{
18476 match(Set dst (VectorCastF2HF src));
18477 format %{ "vector_conv_F2HF $dst $src" %}
18478 ins_encode %{
18479 int vlen_enc = vector_length_encoding(this, $src);
18480 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18481 %}
18482 ins_pipe( pipe_slow );
18483 %}
18484
18485 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18486 predicate(n->as_StoreVector()->memory_size() >= 16);
18487 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18488 format %{ "vcvtps2ph $mem,$src" %}
18489 ins_encode %{
18490 int vlen_enc = vector_length_encoding(this, $src);
18491 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18492 %}
18493 ins_pipe( pipe_slow );
18494 %}
18495
18496 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18497 match(Set dst (ConvHF2F src));
18498 format %{ "vcvtph2ps $dst,$src" %}
18499 ins_encode %{
18500 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18501 %}
18502 ins_pipe( pipe_slow );
18503 %}
18504
18505 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18506 match(Set dst (VectorCastHF2F (LoadVector mem)));
18507 format %{ "vcvtph2ps $dst,$mem" %}
18508 ins_encode %{
18509 int vlen_enc = vector_length_encoding(this);
18510 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18511 %}
18512 ins_pipe( pipe_slow );
18513 %}
18514
18515 instruct vconvHF2F(vec dst, vec src) %{
18516 match(Set dst (VectorCastHF2F src));
18517 ins_cost(125);
18518 format %{ "vector_conv_HF2F $dst,$src" %}
18519 ins_encode %{
18520 int vlen_enc = vector_length_encoding(this);
18521 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18522 %}
18523 ins_pipe( pipe_slow );
18524 %}
18525
18526 // ---------------------------------------- VectorReinterpret ------------------------------------
18527 instruct reinterpret_mask(kReg dst) %{
18528 predicate(n->bottom_type()->isa_pvectmask() &&
18529 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18530 match(Set dst (VectorReinterpret dst));
18531 ins_cost(125);
18532 format %{ "vector_reinterpret $dst\t!" %}
18533 ins_encode %{
18534 // empty
18535 %}
18536 ins_pipe( pipe_slow );
18537 %}
18538
18539 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18540 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18541 n->bottom_type()->isa_pvectmask() &&
18542 n->in(1)->bottom_type()->isa_pvectmask() &&
18543 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18544 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18545 match(Set dst (VectorReinterpret src));
18546 effect(TEMP xtmp);
18547 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18548 ins_encode %{
18549 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18550 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18551 assert(src_sz == dst_sz , "src and dst size mismatch");
18552 int vlen_enc = vector_length_encoding(src_sz);
18553 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18554 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18555 %}
18556 ins_pipe( pipe_slow );
18557 %}
18558
18559 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18560 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18561 n->bottom_type()->isa_pvectmask() &&
18562 n->in(1)->bottom_type()->isa_pvectmask() &&
18563 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18564 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18565 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18566 match(Set dst (VectorReinterpret src));
18567 effect(TEMP xtmp);
18568 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18569 ins_encode %{
18570 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18571 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18572 assert(src_sz == dst_sz , "src and dst size mismatch");
18573 int vlen_enc = vector_length_encoding(src_sz);
18574 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18575 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18576 %}
18577 ins_pipe( pipe_slow );
18578 %}
18579
18580 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18581 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18582 n->bottom_type()->isa_pvectmask() &&
18583 n->in(1)->bottom_type()->isa_pvectmask() &&
18584 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18585 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18586 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18587 match(Set dst (VectorReinterpret src));
18588 effect(TEMP xtmp);
18589 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18590 ins_encode %{
18591 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18592 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18593 assert(src_sz == dst_sz , "src and dst size mismatch");
18594 int vlen_enc = vector_length_encoding(src_sz);
18595 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18596 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18597 %}
18598 ins_pipe( pipe_slow );
18599 %}
18600
18601 instruct reinterpret(vec dst) %{
18602 predicate(!n->bottom_type()->isa_pvectmask() &&
18603 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18604 match(Set dst (VectorReinterpret dst));
18605 ins_cost(125);
18606 format %{ "vector_reinterpret $dst\t!" %}
18607 ins_encode %{
18608 // empty
18609 %}
18610 ins_pipe( pipe_slow );
18611 %}
18612
18613 instruct reinterpret_expand(vec dst, vec src) %{
18614 predicate(UseAVX == 0 &&
18615 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18616 match(Set dst (VectorReinterpret src));
18617 ins_cost(125);
18618 effect(TEMP dst);
18619 format %{ "vector_reinterpret_expand $dst,$src" %}
18620 ins_encode %{
18621 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18622 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18623
18624 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18625 if (src_vlen_in_bytes == 4) {
18626 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18627 } else {
18628 assert(src_vlen_in_bytes == 8, "");
18629 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18630 }
18631 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18632 %}
18633 ins_pipe( pipe_slow );
18634 %}
18635
18636 instruct vreinterpret_expand4(legVec dst, vec src) %{
18637 predicate(UseAVX > 0 &&
18638 !n->bottom_type()->isa_pvectmask() &&
18639 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18640 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18641 match(Set dst (VectorReinterpret src));
18642 ins_cost(125);
18643 format %{ "vector_reinterpret_expand $dst,$src" %}
18644 ins_encode %{
18645 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18646 %}
18647 ins_pipe( pipe_slow );
18648 %}
18649
18650
18651 instruct vreinterpret_expand(legVec dst, vec src) %{
18652 predicate(UseAVX > 0 &&
18653 !n->bottom_type()->isa_pvectmask() &&
18654 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18655 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18656 match(Set dst (VectorReinterpret src));
18657 ins_cost(125);
18658 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18659 ins_encode %{
18660 switch (Matcher::vector_length_in_bytes(this, $src)) {
18661 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18662 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18663 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18664 default: ShouldNotReachHere();
18665 }
18666 %}
18667 ins_pipe( pipe_slow );
18668 %}
18669
18670 instruct reinterpret_shrink(vec dst, legVec src) %{
18671 predicate(!n->bottom_type()->isa_pvectmask() &&
18672 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18673 match(Set dst (VectorReinterpret src));
18674 ins_cost(125);
18675 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18676 ins_encode %{
18677 switch (Matcher::vector_length_in_bytes(this)) {
18678 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18679 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18680 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18681 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18682 default: ShouldNotReachHere();
18683 }
18684 %}
18685 ins_pipe( pipe_slow );
18686 %}
18687
18688 // ----------------------------------------------------------------------------------------------------
18689
18690 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18691 match(Set dst (RoundDoubleMode src rmode));
18692 format %{ "roundsd $dst,$src" %}
18693 ins_cost(150);
18694 ins_encode %{
18695 assert(UseSSE >= 4, "required");
18696 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18697 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18698 }
18699 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18700 %}
18701 ins_pipe(pipe_slow);
18702 %}
18703
18704 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18705 match(Set dst (RoundDoubleMode con rmode));
18706 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18707 ins_cost(150);
18708 ins_encode %{
18709 assert(UseSSE >= 4, "required");
18710 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18711 %}
18712 ins_pipe(pipe_slow);
18713 %}
18714
18715 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18716 predicate(Matcher::vector_length(n) < 8);
18717 match(Set dst (RoundDoubleModeV src rmode));
18718 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18719 ins_encode %{
18720 assert(UseAVX > 0, "required");
18721 int vlen_enc = vector_length_encoding(this);
18722 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18723 %}
18724 ins_pipe( pipe_slow );
18725 %}
18726
18727 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18728 predicate(Matcher::vector_length(n) == 8);
18729 match(Set dst (RoundDoubleModeV src rmode));
18730 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18731 ins_encode %{
18732 assert(UseAVX > 2, "required");
18733 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18734 %}
18735 ins_pipe( pipe_slow );
18736 %}
18737
18738 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18739 predicate(Matcher::vector_length(n) < 8);
18740 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18741 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18742 ins_encode %{
18743 assert(UseAVX > 0, "required");
18744 int vlen_enc = vector_length_encoding(this);
18745 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18746 %}
18747 ins_pipe( pipe_slow );
18748 %}
18749
18750 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18751 predicate(Matcher::vector_length(n) == 8);
18752 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18753 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18754 ins_encode %{
18755 assert(UseAVX > 2, "required");
18756 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18757 %}
18758 ins_pipe( pipe_slow );
18759 %}
18760
18761 instruct onspinwait() %{
18762 match(OnSpinWait);
18763 ins_cost(200);
18764
18765 format %{
18766 $$template
18767 $$emit$$"pause\t! membar_onspinwait"
18768 %}
18769 ins_encode %{
18770 __ pause();
18771 %}
18772 ins_pipe(pipe_slow);
18773 %}
18774
18775 // a * b + c
18776 instruct fmaD_reg(regD a, regD b, regD c) %{
18777 match(Set c (FmaD c (Binary a b)));
18778 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18779 ins_cost(150);
18780 ins_encode %{
18781 assert(UseFMA, "Needs FMA instructions support.");
18782 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18783 %}
18784 ins_pipe( pipe_slow );
18785 %}
18786
18787 // a * b + c
18788 instruct fmaF_reg(regF a, regF b, regF c) %{
18789 match(Set c (FmaF c (Binary a b)));
18790 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18791 ins_cost(150);
18792 ins_encode %{
18793 assert(UseFMA, "Needs FMA instructions support.");
18794 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18795 %}
18796 ins_pipe( pipe_slow );
18797 %}
18798
18799 // ====================VECTOR INSTRUCTIONS=====================================
18800
18801 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18802 instruct MoveVec2Leg(legVec dst, vec src) %{
18803 match(Set dst src);
18804 format %{ "" %}
18805 ins_encode %{
18806 ShouldNotReachHere();
18807 %}
18808 ins_pipe( fpu_reg_reg );
18809 %}
18810
18811 instruct MoveLeg2Vec(vec dst, legVec src) %{
18812 match(Set dst src);
18813 format %{ "" %}
18814 ins_encode %{
18815 ShouldNotReachHere();
18816 %}
18817 ins_pipe( fpu_reg_reg );
18818 %}
18819
18820 // ============================================================================
18821
18822 // Load vectors generic operand pattern
18823 instruct loadV(vec dst, memory mem) %{
18824 match(Set dst (LoadVector mem));
18825 ins_cost(125);
18826 format %{ "load_vector $dst,$mem" %}
18827 ins_encode %{
18828 BasicType bt = Matcher::vector_element_basic_type(this);
18829 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18830 %}
18831 ins_pipe( pipe_slow );
18832 %}
18833
18834 // Store vectors generic operand pattern.
18835 instruct storeV(memory mem, vec src) %{
18836 match(Set mem (StoreVector mem src));
18837 ins_cost(145);
18838 format %{ "store_vector $mem,$src\n\t" %}
18839 ins_encode %{
18840 switch (Matcher::vector_length_in_bytes(this, $src)) {
18841 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18842 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18843 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18844 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18845 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18846 default: ShouldNotReachHere();
18847 }
18848 %}
18849 ins_pipe( pipe_slow );
18850 %}
18851
18852 // ---------------------------------------- Gather ------------------------------------
18853
18854 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18855
18856 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18857 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18858 Matcher::vector_length_in_bytes(n) <= 32);
18859 match(Set dst (LoadVectorGather mem idx));
18860 effect(TEMP dst, TEMP tmp, TEMP mask);
18861 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18862 ins_encode %{
18863 int vlen_enc = vector_length_encoding(this);
18864 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18865 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18866 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18867 __ lea($tmp$$Register, $mem$$Address);
18868 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18869 %}
18870 ins_pipe( pipe_slow );
18871 %}
18872
18873
18874 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18875 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18876 !is_subword_type(Matcher::vector_element_basic_type(n)));
18877 match(Set dst (LoadVectorGather mem idx));
18878 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18879 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18880 ins_encode %{
18881 int vlen_enc = vector_length_encoding(this);
18882 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18883 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18884 __ lea($tmp$$Register, $mem$$Address);
18885 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18886 %}
18887 ins_pipe( pipe_slow );
18888 %}
18889
18890 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18891 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18892 !is_subword_type(Matcher::vector_element_basic_type(n)));
18893 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18894 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18895 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18896 ins_encode %{
18897 assert(UseAVX > 2, "sanity");
18898 int vlen_enc = vector_length_encoding(this);
18899 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18900 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18901 // Note: Since gather instruction partially updates the opmask register used
18902 // for predication hense moving mask operand to a temporary.
18903 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18904 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18905 __ lea($tmp$$Register, $mem$$Address);
18906 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18907 %}
18908 ins_pipe( pipe_slow );
18909 %}
18910
18911 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18912 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18913 match(Set dst (LoadVectorGather mem idx_base));
18914 effect(TEMP tmp, TEMP rtmp);
18915 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18916 ins_encode %{
18917 int vlen_enc = vector_length_encoding(this);
18918 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18919 __ lea($tmp$$Register, $mem$$Address);
18920 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18921 %}
18922 ins_pipe( pipe_slow );
18923 %}
18924
18925 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18926 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18927 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18928 match(Set dst (LoadVectorGather mem idx_base));
18929 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18930 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18931 ins_encode %{
18932 int vlen_enc = vector_length_encoding(this);
18933 int vector_len = Matcher::vector_length(this);
18934 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18935 __ lea($tmp$$Register, $mem$$Address);
18936 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18937 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18938 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18939 %}
18940 ins_pipe( pipe_slow );
18941 %}
18942
18943 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18944 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18945 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18946 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18947 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18948 ins_encode %{
18949 int vlen_enc = vector_length_encoding(this);
18950 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18951 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18952 __ lea($tmp$$Register, $mem$$Address);
18953 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18954 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18955 %}
18956 ins_pipe( pipe_slow );
18957 %}
18958
18959 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18960 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18961 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18962 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18963 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18964 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18965 ins_encode %{
18966 int vlen_enc = vector_length_encoding(this);
18967 int vector_len = Matcher::vector_length(this);
18968 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18969 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18970 __ lea($tmp$$Register, $mem$$Address);
18971 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18972 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18973 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18974 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18975 %}
18976 ins_pipe( pipe_slow );
18977 %}
18978
18979 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18980 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18981 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18982 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18983 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18984 ins_encode %{
18985 int vlen_enc = vector_length_encoding(this);
18986 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18987 __ lea($tmp$$Register, $mem$$Address);
18988 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18989 if (elem_bt == T_SHORT) {
18990 __ movl($mask_idx$$Register, 0x55555555);
18991 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18992 }
18993 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18994 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18995 %}
18996 ins_pipe( pipe_slow );
18997 %}
18998
18999 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
19000 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
19001 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
19002 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
19003 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
19004 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
19005 ins_encode %{
19006 int vlen_enc = vector_length_encoding(this);
19007 int vector_len = Matcher::vector_length(this);
19008 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19009 __ lea($tmp$$Register, $mem$$Address);
19010 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
19011 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
19012 if (elem_bt == T_SHORT) {
19013 __ movl($mask_idx$$Register, 0x55555555);
19014 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
19015 }
19016 __ xorl($mask_idx$$Register, $mask_idx$$Register);
19017 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
19018 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
19019 %}
19020 ins_pipe( pipe_slow );
19021 %}
19022
19023 // ====================Scatter=======================================
19024
19025 // Scatter INT, LONG, FLOAT, DOUBLE
19026
19027 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
19028 predicate(UseAVX > 2);
19029 match(Set mem (StoreVectorScatter mem (Binary src idx)));
19030 effect(TEMP tmp, TEMP ktmp);
19031 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
19032 ins_encode %{
19033 int vlen_enc = vector_length_encoding(this, $src);
19034 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19035
19036 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19037 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19038
19039 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
19040 __ lea($tmp$$Register, $mem$$Address);
19041 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19042 %}
19043 ins_pipe( pipe_slow );
19044 %}
19045
19046 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
19047 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
19048 effect(TEMP tmp, TEMP ktmp);
19049 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19050 ins_encode %{
19051 int vlen_enc = vector_length_encoding(this, $src);
19052 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19053 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19054 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19055 // Note: Since scatter instruction partially updates the opmask register used
19056 // for predication hense moving mask operand to a temporary.
19057 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19058 __ lea($tmp$$Register, $mem$$Address);
19059 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19060 %}
19061 ins_pipe( pipe_slow );
19062 %}
19063
19064 // ====================REPLICATE=======================================
19065
19066 // Replicate byte scalar to be vector
19067 instruct vReplB_reg(vec dst, rRegI src) %{
19068 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19069 match(Set dst (Replicate src));
19070 format %{ "replicateB $dst,$src" %}
19071 ins_encode %{
19072 uint vlen = Matcher::vector_length(this);
19073 if (UseAVX >= 2) {
19074 int vlen_enc = vector_length_encoding(this);
19075 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19076 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19077 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19078 } else {
19079 __ movdl($dst$$XMMRegister, $src$$Register);
19080 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19081 }
19082 } else {
19083 assert(UseAVX < 2, "");
19084 __ movdl($dst$$XMMRegister, $src$$Register);
19085 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19086 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19087 if (vlen >= 16) {
19088 assert(vlen == 16, "");
19089 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19090 }
19091 }
19092 %}
19093 ins_pipe( pipe_slow );
19094 %}
19095
19096 instruct ReplB_mem(vec dst, memory mem) %{
19097 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19098 match(Set dst (Replicate (LoadB mem)));
19099 format %{ "replicateB $dst,$mem" %}
19100 ins_encode %{
19101 int vlen_enc = vector_length_encoding(this);
19102 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19103 %}
19104 ins_pipe( pipe_slow );
19105 %}
19106
19107 // ====================ReplicateS=======================================
19108
19109 instruct vReplS_reg(vec dst, rRegI src) %{
19110 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19111 match(Set dst (Replicate src));
19112 format %{ "replicateS $dst,$src" %}
19113 ins_encode %{
19114 uint vlen = Matcher::vector_length(this);
19115 int vlen_enc = vector_length_encoding(this);
19116 if (UseAVX >= 2) {
19117 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19118 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19119 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19120 } else {
19121 __ movdl($dst$$XMMRegister, $src$$Register);
19122 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19123 }
19124 } else {
19125 assert(UseAVX < 2, "");
19126 __ movdl($dst$$XMMRegister, $src$$Register);
19127 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19128 if (vlen >= 8) {
19129 assert(vlen == 8, "");
19130 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19131 }
19132 }
19133 %}
19134 ins_pipe( pipe_slow );
19135 %}
19136
19137 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19138 match(Set dst (Replicate con));
19139 effect(TEMP rtmp);
19140 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19141 ins_encode %{
19142 int vlen_enc = vector_length_encoding(this);
19143 BasicType bt = Matcher::vector_element_basic_type(this);
19144 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19145 __ movl($rtmp$$Register, $con$$constant);
19146 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19147 %}
19148 ins_pipe( pipe_slow );
19149 %}
19150
19151 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19152 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19153 match(Set dst (Replicate src));
19154 effect(TEMP rtmp);
19155 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19156 ins_encode %{
19157 int vlen_enc = vector_length_encoding(this);
19158 __ evmovw($rtmp$$Register, $src$$XMMRegister);
19159 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19160 %}
19161 ins_pipe( pipe_slow );
19162 %}
19163
19164 instruct ReplS_mem(vec dst, memory mem) %{
19165 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19166 match(Set dst (Replicate (LoadS mem)));
19167 format %{ "replicateS $dst,$mem" %}
19168 ins_encode %{
19169 int vlen_enc = vector_length_encoding(this);
19170 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19171 %}
19172 ins_pipe( pipe_slow );
19173 %}
19174
19175 // ====================ReplicateI=======================================
19176
19177 instruct ReplI_reg(vec dst, rRegI src) %{
19178 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19179 match(Set dst (Replicate src));
19180 format %{ "replicateI $dst,$src" %}
19181 ins_encode %{
19182 uint vlen = Matcher::vector_length(this);
19183 int vlen_enc = vector_length_encoding(this);
19184 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19185 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19186 } else if (VM_Version::supports_avx2()) {
19187 __ movdl($dst$$XMMRegister, $src$$Register);
19188 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19189 } else {
19190 __ movdl($dst$$XMMRegister, $src$$Register);
19191 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19192 }
19193 %}
19194 ins_pipe( pipe_slow );
19195 %}
19196
19197 instruct ReplI_mem(vec dst, memory mem) %{
19198 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19199 match(Set dst (Replicate (LoadI mem)));
19200 format %{ "replicateI $dst,$mem" %}
19201 ins_encode %{
19202 int vlen_enc = vector_length_encoding(this);
19203 if (VM_Version::supports_avx2()) {
19204 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19205 } else if (VM_Version::supports_avx()) {
19206 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19207 } else {
19208 __ movdl($dst$$XMMRegister, $mem$$Address);
19209 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19210 }
19211 %}
19212 ins_pipe( pipe_slow );
19213 %}
19214
19215 instruct ReplI_imm(vec dst, immI con) %{
19216 predicate(Matcher::is_non_long_integral_vector(n));
19217 match(Set dst (Replicate con));
19218 format %{ "replicateI $dst,$con" %}
19219 ins_encode %{
19220 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19221 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19222 type2aelembytes(Matcher::vector_element_basic_type(this))));
19223 BasicType bt = Matcher::vector_element_basic_type(this);
19224 int vlen = Matcher::vector_length_in_bytes(this);
19225 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19226 %}
19227 ins_pipe( pipe_slow );
19228 %}
19229
19230 // Replicate scalar zero to be vector
19231 instruct ReplI_zero(vec dst, immI_0 zero) %{
19232 predicate(Matcher::is_non_long_integral_vector(n));
19233 match(Set dst (Replicate zero));
19234 format %{ "replicateI $dst,$zero" %}
19235 ins_encode %{
19236 int vlen_enc = vector_length_encoding(this);
19237 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19238 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19239 } else {
19240 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19241 }
19242 %}
19243 ins_pipe( fpu_reg_reg );
19244 %}
19245
19246 instruct ReplI_M1(vec dst, immI_M1 con) %{
19247 predicate(Matcher::is_non_long_integral_vector(n));
19248 match(Set dst (Replicate con));
19249 format %{ "vallones $dst" %}
19250 ins_encode %{
19251 int vector_len = vector_length_encoding(this);
19252 __ vallones($dst$$XMMRegister, vector_len);
19253 %}
19254 ins_pipe( pipe_slow );
19255 %}
19256
19257 // ====================ReplicateL=======================================
19258
19259 // Replicate long (8 byte) scalar to be vector
19260 instruct ReplL_reg(vec dst, rRegL src) %{
19261 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19262 match(Set dst (Replicate src));
19263 format %{ "replicateL $dst,$src" %}
19264 ins_encode %{
19265 int vlen = Matcher::vector_length(this);
19266 int vlen_enc = vector_length_encoding(this);
19267 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19268 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19269 } else if (VM_Version::supports_avx2()) {
19270 __ movdq($dst$$XMMRegister, $src$$Register);
19271 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19272 } else {
19273 __ movdq($dst$$XMMRegister, $src$$Register);
19274 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19275 }
19276 %}
19277 ins_pipe( pipe_slow );
19278 %}
19279
19280 instruct ReplL_mem(vec dst, memory mem) %{
19281 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19282 match(Set dst (Replicate (LoadL mem)));
19283 format %{ "replicateL $dst,$mem" %}
19284 ins_encode %{
19285 int vlen_enc = vector_length_encoding(this);
19286 if (VM_Version::supports_avx2()) {
19287 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19288 } else if (VM_Version::supports_sse3()) {
19289 __ movddup($dst$$XMMRegister, $mem$$Address);
19290 } else {
19291 __ movq($dst$$XMMRegister, $mem$$Address);
19292 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19293 }
19294 %}
19295 ins_pipe( pipe_slow );
19296 %}
19297
19298 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19299 instruct ReplL_imm(vec dst, immL con) %{
19300 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19301 match(Set dst (Replicate con));
19302 format %{ "replicateL $dst,$con" %}
19303 ins_encode %{
19304 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19305 int vlen = Matcher::vector_length_in_bytes(this);
19306 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19307 %}
19308 ins_pipe( pipe_slow );
19309 %}
19310
19311 instruct ReplL_zero(vec dst, immL0 zero) %{
19312 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19313 match(Set dst (Replicate zero));
19314 format %{ "replicateL $dst,$zero" %}
19315 ins_encode %{
19316 int vlen_enc = vector_length_encoding(this);
19317 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19318 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19319 } else {
19320 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19321 }
19322 %}
19323 ins_pipe( fpu_reg_reg );
19324 %}
19325
19326 instruct ReplL_M1(vec dst, immL_M1 con) %{
19327 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19328 match(Set dst (Replicate con));
19329 format %{ "vallones $dst" %}
19330 ins_encode %{
19331 int vector_len = vector_length_encoding(this);
19332 __ vallones($dst$$XMMRegister, vector_len);
19333 %}
19334 ins_pipe( pipe_slow );
19335 %}
19336
19337 // ====================ReplicateF=======================================
19338
19339 instruct vReplF_reg(vec dst, vlRegF src) %{
19340 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19341 match(Set dst (Replicate src));
19342 format %{ "replicateF $dst,$src" %}
19343 ins_encode %{
19344 uint vlen = Matcher::vector_length(this);
19345 int vlen_enc = vector_length_encoding(this);
19346 if (vlen <= 4) {
19347 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19348 } else if (VM_Version::supports_avx2()) {
19349 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19350 } else {
19351 assert(vlen == 8, "sanity");
19352 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19353 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19354 }
19355 %}
19356 ins_pipe( pipe_slow );
19357 %}
19358
19359 instruct ReplF_reg(vec dst, vlRegF src) %{
19360 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19361 match(Set dst (Replicate src));
19362 format %{ "replicateF $dst,$src" %}
19363 ins_encode %{
19364 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19365 %}
19366 ins_pipe( pipe_slow );
19367 %}
19368
19369 instruct ReplF_mem(vec dst, memory mem) %{
19370 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19371 match(Set dst (Replicate (LoadF mem)));
19372 format %{ "replicateF $dst,$mem" %}
19373 ins_encode %{
19374 int vlen_enc = vector_length_encoding(this);
19375 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19376 %}
19377 ins_pipe( pipe_slow );
19378 %}
19379
19380 // Replicate float scalar immediate to be vector by loading from const table.
19381 instruct ReplF_imm(vec dst, immF con) %{
19382 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19383 match(Set dst (Replicate con));
19384 format %{ "replicateF $dst,$con" %}
19385 ins_encode %{
19386 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19387 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19388 int vlen = Matcher::vector_length_in_bytes(this);
19389 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19390 %}
19391 ins_pipe( pipe_slow );
19392 %}
19393
19394 instruct ReplF_zero(vec dst, immF0 zero) %{
19395 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19396 match(Set dst (Replicate zero));
19397 format %{ "replicateF $dst,$zero" %}
19398 ins_encode %{
19399 int vlen_enc = vector_length_encoding(this);
19400 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19401 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19402 } else {
19403 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19404 }
19405 %}
19406 ins_pipe( fpu_reg_reg );
19407 %}
19408
19409 // ====================ReplicateD=======================================
19410
19411 // Replicate double (8 bytes) scalar to be vector
19412 instruct vReplD_reg(vec dst, vlRegD src) %{
19413 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19414 match(Set dst (Replicate src));
19415 format %{ "replicateD $dst,$src" %}
19416 ins_encode %{
19417 uint vlen = Matcher::vector_length(this);
19418 int vlen_enc = vector_length_encoding(this);
19419 if (vlen <= 2) {
19420 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19421 } else if (VM_Version::supports_avx2()) {
19422 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19423 } else {
19424 assert(vlen == 4, "sanity");
19425 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19426 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19427 }
19428 %}
19429 ins_pipe( pipe_slow );
19430 %}
19431
19432 instruct ReplD_reg(vec dst, vlRegD src) %{
19433 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19434 match(Set dst (Replicate src));
19435 format %{ "replicateD $dst,$src" %}
19436 ins_encode %{
19437 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19438 %}
19439 ins_pipe( pipe_slow );
19440 %}
19441
19442 instruct ReplD_mem(vec dst, memory mem) %{
19443 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19444 match(Set dst (Replicate (LoadD mem)));
19445 format %{ "replicateD $dst,$mem" %}
19446 ins_encode %{
19447 if (Matcher::vector_length(this) >= 4) {
19448 int vlen_enc = vector_length_encoding(this);
19449 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19450 } else {
19451 __ movddup($dst$$XMMRegister, $mem$$Address);
19452 }
19453 %}
19454 ins_pipe( pipe_slow );
19455 %}
19456
19457 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19458 instruct ReplD_imm(vec dst, immD con) %{
19459 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19460 match(Set dst (Replicate con));
19461 format %{ "replicateD $dst,$con" %}
19462 ins_encode %{
19463 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19464 int vlen = Matcher::vector_length_in_bytes(this);
19465 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19466 %}
19467 ins_pipe( pipe_slow );
19468 %}
19469
19470 instruct ReplD_zero(vec dst, immD0 zero) %{
19471 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19472 match(Set dst (Replicate zero));
19473 format %{ "replicateD $dst,$zero" %}
19474 ins_encode %{
19475 int vlen_enc = vector_length_encoding(this);
19476 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19477 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19478 } else {
19479 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19480 }
19481 %}
19482 ins_pipe( fpu_reg_reg );
19483 %}
19484
19485 // ====================VECTOR INSERT=======================================
19486
19487 instruct insert(vec dst, rRegI val, immU8 idx) %{
19488 predicate(Matcher::vector_length_in_bytes(n) < 32);
19489 match(Set dst (VectorInsert (Binary dst val) idx));
19490 format %{ "vector_insert $dst,$val,$idx" %}
19491 ins_encode %{
19492 assert(UseSSE >= 4, "required");
19493 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19494
19495 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19496
19497 assert(is_integral_type(elem_bt), "");
19498 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19499
19500 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19501 %}
19502 ins_pipe( pipe_slow );
19503 %}
19504
19505 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19506 predicate(Matcher::vector_length_in_bytes(n) == 32);
19507 match(Set dst (VectorInsert (Binary src val) idx));
19508 effect(TEMP vtmp);
19509 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19510 ins_encode %{
19511 int vlen_enc = Assembler::AVX_256bit;
19512 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19513 int elem_per_lane = 16/type2aelembytes(elem_bt);
19514 int log2epr = log2(elem_per_lane);
19515
19516 assert(is_integral_type(elem_bt), "sanity");
19517 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19518
19519 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19520 uint y_idx = ($idx$$constant >> log2epr) & 1;
19521 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19522 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19523 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19524 %}
19525 ins_pipe( pipe_slow );
19526 %}
19527
19528 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19529 predicate(Matcher::vector_length_in_bytes(n) == 64);
19530 match(Set dst (VectorInsert (Binary src val) idx));
19531 effect(TEMP vtmp);
19532 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19533 ins_encode %{
19534 assert(UseAVX > 2, "sanity");
19535
19536 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19537 int elem_per_lane = 16/type2aelembytes(elem_bt);
19538 int log2epr = log2(elem_per_lane);
19539
19540 assert(is_integral_type(elem_bt), "");
19541 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19542
19543 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19544 uint y_idx = ($idx$$constant >> log2epr) & 3;
19545 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19546 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19547 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19548 %}
19549 ins_pipe( pipe_slow );
19550 %}
19551
19552 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19553 predicate(Matcher::vector_length(n) == 2);
19554 match(Set dst (VectorInsert (Binary dst val) idx));
19555 format %{ "vector_insert $dst,$val,$idx" %}
19556 ins_encode %{
19557 assert(UseSSE >= 4, "required");
19558 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19559 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19560
19561 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19562 %}
19563 ins_pipe( pipe_slow );
19564 %}
19565
19566 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19567 predicate(Matcher::vector_length(n) == 4);
19568 match(Set dst (VectorInsert (Binary src val) idx));
19569 effect(TEMP vtmp);
19570 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19571 ins_encode %{
19572 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19573 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19574
19575 uint x_idx = $idx$$constant & right_n_bits(1);
19576 uint y_idx = ($idx$$constant >> 1) & 1;
19577 int vlen_enc = Assembler::AVX_256bit;
19578 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19579 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19580 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19581 %}
19582 ins_pipe( pipe_slow );
19583 %}
19584
19585 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19586 predicate(Matcher::vector_length(n) == 8);
19587 match(Set dst (VectorInsert (Binary src val) idx));
19588 effect(TEMP vtmp);
19589 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19590 ins_encode %{
19591 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19592 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19593
19594 uint x_idx = $idx$$constant & right_n_bits(1);
19595 uint y_idx = ($idx$$constant >> 1) & 3;
19596 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19597 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19598 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19599 %}
19600 ins_pipe( pipe_slow );
19601 %}
19602
19603 instruct insertF(vec dst, regF val, immU8 idx) %{
19604 predicate(Matcher::vector_length(n) < 8);
19605 match(Set dst (VectorInsert (Binary dst val) idx));
19606 format %{ "vector_insert $dst,$val,$idx" %}
19607 ins_encode %{
19608 assert(UseSSE >= 4, "sanity");
19609
19610 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19611 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19612
19613 uint x_idx = $idx$$constant & right_n_bits(2);
19614 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19615 %}
19616 ins_pipe( pipe_slow );
19617 %}
19618
19619 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19620 predicate(Matcher::vector_length(n) >= 8);
19621 match(Set dst (VectorInsert (Binary src val) idx));
19622 effect(TEMP vtmp);
19623 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19624 ins_encode %{
19625 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19626 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19627
19628 int vlen = Matcher::vector_length(this);
19629 uint x_idx = $idx$$constant & right_n_bits(2);
19630 if (vlen == 8) {
19631 uint y_idx = ($idx$$constant >> 2) & 1;
19632 int vlen_enc = Assembler::AVX_256bit;
19633 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19634 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19635 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19636 } else {
19637 assert(vlen == 16, "sanity");
19638 uint y_idx = ($idx$$constant >> 2) & 3;
19639 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19640 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19641 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19642 }
19643 %}
19644 ins_pipe( pipe_slow );
19645 %}
19646
19647 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19648 predicate(Matcher::vector_length(n) == 2);
19649 match(Set dst (VectorInsert (Binary dst val) idx));
19650 effect(TEMP tmp);
19651 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19652 ins_encode %{
19653 assert(UseSSE >= 4, "sanity");
19654 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19655 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19656
19657 __ movq($tmp$$Register, $val$$XMMRegister);
19658 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19659 %}
19660 ins_pipe( pipe_slow );
19661 %}
19662
19663 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19664 predicate(Matcher::vector_length(n) == 4);
19665 match(Set dst (VectorInsert (Binary src val) idx));
19666 effect(TEMP vtmp, TEMP tmp);
19667 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19668 ins_encode %{
19669 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19670 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19671
19672 uint x_idx = $idx$$constant & right_n_bits(1);
19673 uint y_idx = ($idx$$constant >> 1) & 1;
19674 int vlen_enc = Assembler::AVX_256bit;
19675 __ movq($tmp$$Register, $val$$XMMRegister);
19676 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19677 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19678 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19679 %}
19680 ins_pipe( pipe_slow );
19681 %}
19682
19683 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19684 predicate(Matcher::vector_length(n) == 8);
19685 match(Set dst (VectorInsert (Binary src val) idx));
19686 effect(TEMP tmp, TEMP vtmp);
19687 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19688 ins_encode %{
19689 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19690 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19691
19692 uint x_idx = $idx$$constant & right_n_bits(1);
19693 uint y_idx = ($idx$$constant >> 1) & 3;
19694 __ movq($tmp$$Register, $val$$XMMRegister);
19695 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19696 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19697 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19698 %}
19699 ins_pipe( pipe_slow );
19700 %}
19701
19702 // ====================REDUCTION ARITHMETIC=======================================
19703
19704 // =======================Int Reduction==========================================
19705
19706 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19707 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19708 match(Set dst (AddReductionVI src1 src2));
19709 match(Set dst (MulReductionVI src1 src2));
19710 match(Set dst (AndReductionV src1 src2));
19711 match(Set dst ( OrReductionV src1 src2));
19712 match(Set dst (XorReductionV src1 src2));
19713 match(Set dst (MinReductionV src1 src2));
19714 match(Set dst (MaxReductionV src1 src2));
19715 match(Set dst (UMinReductionV src1 src2));
19716 match(Set dst (UMaxReductionV src1 src2));
19717 effect(TEMP vtmp1, TEMP vtmp2);
19718 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19719 ins_encode %{
19720 int opcode = this->ideal_Opcode();
19721 int vlen = Matcher::vector_length(this, $src2);
19722 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19723 %}
19724 ins_pipe( pipe_slow );
19725 %}
19726
19727 // =======================Long Reduction==========================================
19728
19729 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19730 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19731 match(Set dst (AddReductionVL src1 src2));
19732 match(Set dst (MulReductionVL src1 src2));
19733 match(Set dst (AndReductionV src1 src2));
19734 match(Set dst ( OrReductionV src1 src2));
19735 match(Set dst (XorReductionV src1 src2));
19736 match(Set dst (MinReductionV src1 src2));
19737 match(Set dst (MaxReductionV src1 src2));
19738 match(Set dst (UMinReductionV src1 src2));
19739 match(Set dst (UMaxReductionV src1 src2));
19740 effect(TEMP vtmp1, TEMP vtmp2);
19741 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19742 ins_encode %{
19743 int opcode = this->ideal_Opcode();
19744 int vlen = Matcher::vector_length(this, $src2);
19745 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19746 %}
19747 ins_pipe( pipe_slow );
19748 %}
19749
19750 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19751 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19752 match(Set dst (AddReductionVL src1 src2));
19753 match(Set dst (MulReductionVL src1 src2));
19754 match(Set dst (AndReductionV src1 src2));
19755 match(Set dst ( OrReductionV src1 src2));
19756 match(Set dst (XorReductionV src1 src2));
19757 match(Set dst (MinReductionV src1 src2));
19758 match(Set dst (MaxReductionV src1 src2));
19759 match(Set dst (UMinReductionV src1 src2));
19760 match(Set dst (UMaxReductionV src1 src2));
19761 effect(TEMP vtmp1, TEMP vtmp2);
19762 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19763 ins_encode %{
19764 int opcode = this->ideal_Opcode();
19765 int vlen = Matcher::vector_length(this, $src2);
19766 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19767 %}
19768 ins_pipe( pipe_slow );
19769 %}
19770
19771 // =======================Float Reduction==========================================
19772
19773 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19774 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19775 match(Set dst (AddReductionVF dst src));
19776 match(Set dst (MulReductionVF dst src));
19777 effect(TEMP dst, TEMP vtmp);
19778 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19779 ins_encode %{
19780 int opcode = this->ideal_Opcode();
19781 int vlen = Matcher::vector_length(this, $src);
19782 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19783 %}
19784 ins_pipe( pipe_slow );
19785 %}
19786
19787 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19788 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19789 match(Set dst (AddReductionVF dst src));
19790 match(Set dst (MulReductionVF dst src));
19791 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19792 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19793 ins_encode %{
19794 int opcode = this->ideal_Opcode();
19795 int vlen = Matcher::vector_length(this, $src);
19796 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19797 %}
19798 ins_pipe( pipe_slow );
19799 %}
19800
19801 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19802 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19803 match(Set dst (AddReductionVF dst src));
19804 match(Set dst (MulReductionVF dst src));
19805 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19806 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19807 ins_encode %{
19808 int opcode = this->ideal_Opcode();
19809 int vlen = Matcher::vector_length(this, $src);
19810 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19811 %}
19812 ins_pipe( pipe_slow );
19813 %}
19814
19815
19816 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19817 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19818 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19819 // src1 contains reduction identity
19820 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19821 match(Set dst (AddReductionVF src1 src2));
19822 match(Set dst (MulReductionVF src1 src2));
19823 effect(TEMP dst);
19824 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19825 ins_encode %{
19826 int opcode = this->ideal_Opcode();
19827 int vlen = Matcher::vector_length(this, $src2);
19828 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19829 %}
19830 ins_pipe( pipe_slow );
19831 %}
19832
19833 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19834 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19835 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19836 // src1 contains reduction identity
19837 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19838 match(Set dst (AddReductionVF src1 src2));
19839 match(Set dst (MulReductionVF src1 src2));
19840 effect(TEMP dst, TEMP vtmp);
19841 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19842 ins_encode %{
19843 int opcode = this->ideal_Opcode();
19844 int vlen = Matcher::vector_length(this, $src2);
19845 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19846 %}
19847 ins_pipe( pipe_slow );
19848 %}
19849
19850 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19851 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19852 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19853 // src1 contains reduction identity
19854 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19855 match(Set dst (AddReductionVF src1 src2));
19856 match(Set dst (MulReductionVF src1 src2));
19857 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19858 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19859 ins_encode %{
19860 int opcode = this->ideal_Opcode();
19861 int vlen = Matcher::vector_length(this, $src2);
19862 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19863 %}
19864 ins_pipe( pipe_slow );
19865 %}
19866
19867 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19868 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19869 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19870 // src1 contains reduction identity
19871 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19872 match(Set dst (AddReductionVF src1 src2));
19873 match(Set dst (MulReductionVF src1 src2));
19874 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19875 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19876 ins_encode %{
19877 int opcode = this->ideal_Opcode();
19878 int vlen = Matcher::vector_length(this, $src2);
19879 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19880 %}
19881 ins_pipe( pipe_slow );
19882 %}
19883
19884 // =======================Double Reduction==========================================
19885
19886 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19887 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19888 match(Set dst (AddReductionVD dst src));
19889 match(Set dst (MulReductionVD dst src));
19890 effect(TEMP dst, TEMP vtmp);
19891 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19892 ins_encode %{
19893 int opcode = this->ideal_Opcode();
19894 int vlen = Matcher::vector_length(this, $src);
19895 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19896 %}
19897 ins_pipe( pipe_slow );
19898 %}
19899
19900 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19901 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19902 match(Set dst (AddReductionVD dst src));
19903 match(Set dst (MulReductionVD dst src));
19904 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19905 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19906 ins_encode %{
19907 int opcode = this->ideal_Opcode();
19908 int vlen = Matcher::vector_length(this, $src);
19909 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19910 %}
19911 ins_pipe( pipe_slow );
19912 %}
19913
19914 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19915 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19916 match(Set dst (AddReductionVD dst src));
19917 match(Set dst (MulReductionVD dst src));
19918 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19919 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19920 ins_encode %{
19921 int opcode = this->ideal_Opcode();
19922 int vlen = Matcher::vector_length(this, $src);
19923 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19924 %}
19925 ins_pipe( pipe_slow );
19926 %}
19927
19928 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19929 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19930 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19931 // src1 contains reduction identity
19932 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19933 match(Set dst (AddReductionVD src1 src2));
19934 match(Set dst (MulReductionVD src1 src2));
19935 effect(TEMP dst);
19936 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19937 ins_encode %{
19938 int opcode = this->ideal_Opcode();
19939 int vlen = Matcher::vector_length(this, $src2);
19940 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19941 %}
19942 ins_pipe( pipe_slow );
19943 %}
19944
19945 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19946 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19947 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19948 // src1 contains reduction identity
19949 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19950 match(Set dst (AddReductionVD src1 src2));
19951 match(Set dst (MulReductionVD src1 src2));
19952 effect(TEMP dst, TEMP vtmp);
19953 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19954 ins_encode %{
19955 int opcode = this->ideal_Opcode();
19956 int vlen = Matcher::vector_length(this, $src2);
19957 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19958 %}
19959 ins_pipe( pipe_slow );
19960 %}
19961
19962 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19963 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19964 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19965 // src1 contains reduction identity
19966 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19967 match(Set dst (AddReductionVD src1 src2));
19968 match(Set dst (MulReductionVD src1 src2));
19969 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19970 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19971 ins_encode %{
19972 int opcode = this->ideal_Opcode();
19973 int vlen = Matcher::vector_length(this, $src2);
19974 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19975 %}
19976 ins_pipe( pipe_slow );
19977 %}
19978
19979 // =======================Byte Reduction==========================================
19980
19981 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19982 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19983 match(Set dst (AddReductionVI src1 src2));
19984 match(Set dst (AndReductionV src1 src2));
19985 match(Set dst ( OrReductionV src1 src2));
19986 match(Set dst (XorReductionV src1 src2));
19987 match(Set dst (MinReductionV src1 src2));
19988 match(Set dst (MaxReductionV src1 src2));
19989 match(Set dst (UMinReductionV src1 src2));
19990 match(Set dst (UMaxReductionV src1 src2));
19991 effect(TEMP vtmp1, TEMP vtmp2);
19992 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19993 ins_encode %{
19994 int opcode = this->ideal_Opcode();
19995 int vlen = Matcher::vector_length(this, $src2);
19996 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19997 %}
19998 ins_pipe( pipe_slow );
19999 %}
20000
20001 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20002 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
20003 match(Set dst (AddReductionVI src1 src2));
20004 match(Set dst (AndReductionV src1 src2));
20005 match(Set dst ( OrReductionV src1 src2));
20006 match(Set dst (XorReductionV src1 src2));
20007 match(Set dst (MinReductionV src1 src2));
20008 match(Set dst (MaxReductionV src1 src2));
20009 match(Set dst (UMinReductionV src1 src2));
20010 match(Set dst (UMaxReductionV src1 src2));
20011 effect(TEMP vtmp1, TEMP vtmp2);
20012 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20013 ins_encode %{
20014 int opcode = this->ideal_Opcode();
20015 int vlen = Matcher::vector_length(this, $src2);
20016 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20017 %}
20018 ins_pipe( pipe_slow );
20019 %}
20020
20021 // =======================Short Reduction==========================================
20022
20023 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20024 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
20025 match(Set dst (AddReductionVI src1 src2));
20026 match(Set dst (MulReductionVI src1 src2));
20027 match(Set dst (AndReductionV src1 src2));
20028 match(Set dst ( OrReductionV src1 src2));
20029 match(Set dst (XorReductionV src1 src2));
20030 match(Set dst (MinReductionV src1 src2));
20031 match(Set dst (MaxReductionV src1 src2));
20032 match(Set dst (UMinReductionV src1 src2));
20033 match(Set dst (UMaxReductionV src1 src2));
20034 effect(TEMP vtmp1, TEMP vtmp2);
20035 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
20036 ins_encode %{
20037 int opcode = this->ideal_Opcode();
20038 int vlen = Matcher::vector_length(this, $src2);
20039 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20040 %}
20041 ins_pipe( pipe_slow );
20042 %}
20043
20044 // =======================Mul Reduction==========================================
20045
20046 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
20047 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20048 Matcher::vector_length(n->in(2)) <= 32); // src2
20049 match(Set dst (MulReductionVI src1 src2));
20050 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20051 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20052 ins_encode %{
20053 int opcode = this->ideal_Opcode();
20054 int vlen = Matcher::vector_length(this, $src2);
20055 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20056 %}
20057 ins_pipe( pipe_slow );
20058 %}
20059
20060 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
20061 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20062 Matcher::vector_length(n->in(2)) == 64); // src2
20063 match(Set dst (MulReductionVI src1 src2));
20064 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20065 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20066 ins_encode %{
20067 int opcode = this->ideal_Opcode();
20068 int vlen = Matcher::vector_length(this, $src2);
20069 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20070 %}
20071 ins_pipe( pipe_slow );
20072 %}
20073
20074 //--------------------Min/Max Float Reduction --------------------
20075 // Float Min Reduction
20076 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20077 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20078 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20079 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20080 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20081 Matcher::vector_length(n->in(2)) == 2);
20082 match(Set dst (MinReductionV src1 src2));
20083 match(Set dst (MaxReductionV src1 src2));
20084 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20085 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20086 ins_encode %{
20087 assert(UseAVX > 0, "sanity");
20088
20089 int opcode = this->ideal_Opcode();
20090 int vlen = Matcher::vector_length(this, $src2);
20091 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20092 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20093 %}
20094 ins_pipe( pipe_slow );
20095 %}
20096
20097 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20098 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20099 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20100 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20101 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20102 Matcher::vector_length(n->in(2)) >= 4);
20103 match(Set dst (MinReductionV src1 src2));
20104 match(Set dst (MaxReductionV src1 src2));
20105 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20106 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20107 ins_encode %{
20108 assert(UseAVX > 0, "sanity");
20109
20110 int opcode = this->ideal_Opcode();
20111 int vlen = Matcher::vector_length(this, $src2);
20112 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20113 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20114 %}
20115 ins_pipe( pipe_slow );
20116 %}
20117
20118 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20119 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20120 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20121 Matcher::vector_length(n->in(2)) == 2);
20122 match(Set dst (MinReductionV dst src));
20123 match(Set dst (MaxReductionV dst src));
20124 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20125 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20126 ins_encode %{
20127 assert(UseAVX > 0, "sanity");
20128
20129 int opcode = this->ideal_Opcode();
20130 int vlen = Matcher::vector_length(this, $src);
20131 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20132 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20133 %}
20134 ins_pipe( pipe_slow );
20135 %}
20136
20137
20138 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20139 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20140 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20141 Matcher::vector_length(n->in(2)) >= 4);
20142 match(Set dst (MinReductionV dst src));
20143 match(Set dst (MaxReductionV dst src));
20144 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20145 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20146 ins_encode %{
20147 assert(UseAVX > 0, "sanity");
20148
20149 int opcode = this->ideal_Opcode();
20150 int vlen = Matcher::vector_length(this, $src);
20151 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20152 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20153 %}
20154 ins_pipe( pipe_slow );
20155 %}
20156
20157 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20158 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20159 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20160 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20161 Matcher::vector_length(n->in(2)) == 2);
20162 match(Set dst (MinReductionV src1 src2));
20163 match(Set dst (MaxReductionV src1 src2));
20164 effect(TEMP dst, TEMP xtmp1);
20165 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20166 ins_encode %{
20167 int opcode = this->ideal_Opcode();
20168 int vlen = Matcher::vector_length(this, $src2);
20169 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20170 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20171 %}
20172 ins_pipe( pipe_slow );
20173 %}
20174
20175 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20176 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20177 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20178 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20179 Matcher::vector_length(n->in(2)) >= 4);
20180 match(Set dst (MinReductionV src1 src2));
20181 match(Set dst (MaxReductionV src1 src2));
20182 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20183 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20184 ins_encode %{
20185 int opcode = this->ideal_Opcode();
20186 int vlen = Matcher::vector_length(this, $src2);
20187 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20188 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20189 %}
20190 ins_pipe( pipe_slow );
20191 %}
20192
20193 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20194 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20195 Matcher::vector_length(n->in(2)) == 2);
20196 match(Set dst (MinReductionV dst src));
20197 match(Set dst (MaxReductionV dst src));
20198 effect(TEMP dst, TEMP xtmp1);
20199 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20200 ins_encode %{
20201 int opcode = this->ideal_Opcode();
20202 int vlen = Matcher::vector_length(this, $src);
20203 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20204 $xtmp1$$XMMRegister);
20205 %}
20206 ins_pipe( pipe_slow );
20207 %}
20208
20209 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20210 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20211 Matcher::vector_length(n->in(2)) >= 4);
20212 match(Set dst (MinReductionV dst src));
20213 match(Set dst (MaxReductionV dst src));
20214 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20215 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20216 ins_encode %{
20217 int opcode = this->ideal_Opcode();
20218 int vlen = Matcher::vector_length(this, $src);
20219 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20220 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20221 %}
20222 ins_pipe( pipe_slow );
20223 %}
20224
20225 //--------------------Min Double Reduction --------------------
20226 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20227 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20228 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20229 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20230 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20231 Matcher::vector_length(n->in(2)) == 2);
20232 match(Set dst (MinReductionV src1 src2));
20233 match(Set dst (MaxReductionV src1 src2));
20234 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20235 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20236 ins_encode %{
20237 assert(UseAVX > 0, "sanity");
20238
20239 int opcode = this->ideal_Opcode();
20240 int vlen = Matcher::vector_length(this, $src2);
20241 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20242 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20243 %}
20244 ins_pipe( pipe_slow );
20245 %}
20246
20247 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20248 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20249 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20250 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20251 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20252 Matcher::vector_length(n->in(2)) >= 4);
20253 match(Set dst (MinReductionV src1 src2));
20254 match(Set dst (MaxReductionV src1 src2));
20255 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20256 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20257 ins_encode %{
20258 assert(UseAVX > 0, "sanity");
20259
20260 int opcode = this->ideal_Opcode();
20261 int vlen = Matcher::vector_length(this, $src2);
20262 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20263 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20264 %}
20265 ins_pipe( pipe_slow );
20266 %}
20267
20268
20269 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20270 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20271 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20272 Matcher::vector_length(n->in(2)) == 2);
20273 match(Set dst (MinReductionV dst src));
20274 match(Set dst (MaxReductionV dst src));
20275 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20276 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20277 ins_encode %{
20278 assert(UseAVX > 0, "sanity");
20279
20280 int opcode = this->ideal_Opcode();
20281 int vlen = Matcher::vector_length(this, $src);
20282 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20283 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20284 %}
20285 ins_pipe( pipe_slow );
20286 %}
20287
20288 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20289 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20290 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20291 Matcher::vector_length(n->in(2)) >= 4);
20292 match(Set dst (MinReductionV dst src));
20293 match(Set dst (MaxReductionV dst src));
20294 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20295 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20296 ins_encode %{
20297 assert(UseAVX > 0, "sanity");
20298
20299 int opcode = this->ideal_Opcode();
20300 int vlen = Matcher::vector_length(this, $src);
20301 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20302 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20303 %}
20304 ins_pipe( pipe_slow );
20305 %}
20306
20307 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20308 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20309 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20310 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20311 Matcher::vector_length(n->in(2)) == 2);
20312 match(Set dst (MinReductionV src1 src2));
20313 match(Set dst (MaxReductionV src1 src2));
20314 effect(TEMP dst, TEMP xtmp1);
20315 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20316 ins_encode %{
20317 int opcode = this->ideal_Opcode();
20318 int vlen = Matcher::vector_length(this, $src2);
20319 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20320 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20321 %}
20322 ins_pipe( pipe_slow );
20323 %}
20324
20325 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20326 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20327 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20328 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20329 Matcher::vector_length(n->in(2)) >= 4);
20330 match(Set dst (MinReductionV src1 src2));
20331 match(Set dst (MaxReductionV src1 src2));
20332 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20333 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20334 ins_encode %{
20335 int opcode = this->ideal_Opcode();
20336 int vlen = Matcher::vector_length(this, $src2);
20337 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20338 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20339 %}
20340 ins_pipe( pipe_slow );
20341 %}
20342
20343
20344 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20345 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20346 Matcher::vector_length(n->in(2)) == 2);
20347 match(Set dst (MinReductionV dst src));
20348 match(Set dst (MaxReductionV dst src));
20349 effect(TEMP dst, TEMP xtmp1);
20350 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20351 ins_encode %{
20352 int opcode = this->ideal_Opcode();
20353 int vlen = Matcher::vector_length(this, $src);
20354 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20355 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20356 %}
20357 ins_pipe( pipe_slow );
20358 %}
20359
20360 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20361 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20362 Matcher::vector_length(n->in(2)) >= 4);
20363 match(Set dst (MinReductionV dst src));
20364 match(Set dst (MaxReductionV dst src));
20365 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20366 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20367 ins_encode %{
20368 int opcode = this->ideal_Opcode();
20369 int vlen = Matcher::vector_length(this, $src);
20370 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20371 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20372 %}
20373 ins_pipe( pipe_slow );
20374 %}
20375
20376 // ====================VECTOR ARITHMETIC=======================================
20377
20378 // --------------------------------- ADD --------------------------------------
20379
20380 // Bytes vector add
20381 instruct vaddB(vec dst, vec src) %{
20382 predicate(UseAVX == 0);
20383 match(Set dst (AddVB dst src));
20384 format %{ "paddb $dst,$src\t! add packedB" %}
20385 ins_encode %{
20386 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20387 %}
20388 ins_pipe( pipe_slow );
20389 %}
20390
20391 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20392 predicate(UseAVX > 0);
20393 match(Set dst (AddVB src1 src2));
20394 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20395 ins_encode %{
20396 int vlen_enc = vector_length_encoding(this);
20397 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20398 %}
20399 ins_pipe( pipe_slow );
20400 %}
20401
20402 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20403 predicate((UseAVX > 0) &&
20404 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20405 match(Set dst (AddVB src (LoadVector mem)));
20406 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20407 ins_encode %{
20408 int vlen_enc = vector_length_encoding(this);
20409 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20410 %}
20411 ins_pipe( pipe_slow );
20412 %}
20413
20414 // Shorts/Chars vector add
20415 instruct vaddS(vec dst, vec src) %{
20416 predicate(UseAVX == 0);
20417 match(Set dst (AddVS dst src));
20418 format %{ "paddw $dst,$src\t! add packedS" %}
20419 ins_encode %{
20420 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20421 %}
20422 ins_pipe( pipe_slow );
20423 %}
20424
20425 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20426 predicate(UseAVX > 0);
20427 match(Set dst (AddVS src1 src2));
20428 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20429 ins_encode %{
20430 int vlen_enc = vector_length_encoding(this);
20431 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20432 %}
20433 ins_pipe( pipe_slow );
20434 %}
20435
20436 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20437 predicate((UseAVX > 0) &&
20438 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20439 match(Set dst (AddVS src (LoadVector mem)));
20440 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20441 ins_encode %{
20442 int vlen_enc = vector_length_encoding(this);
20443 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20444 %}
20445 ins_pipe( pipe_slow );
20446 %}
20447
20448 // Integers vector add
20449 instruct vaddI(vec dst, vec src) %{
20450 predicate(UseAVX == 0);
20451 match(Set dst (AddVI dst src));
20452 format %{ "paddd $dst,$src\t! add packedI" %}
20453 ins_encode %{
20454 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20455 %}
20456 ins_pipe( pipe_slow );
20457 %}
20458
20459 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20460 predicate(UseAVX > 0);
20461 match(Set dst (AddVI src1 src2));
20462 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20463 ins_encode %{
20464 int vlen_enc = vector_length_encoding(this);
20465 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20466 %}
20467 ins_pipe( pipe_slow );
20468 %}
20469
20470
20471 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20472 predicate((UseAVX > 0) &&
20473 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20474 match(Set dst (AddVI src (LoadVector mem)));
20475 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20476 ins_encode %{
20477 int vlen_enc = vector_length_encoding(this);
20478 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20479 %}
20480 ins_pipe( pipe_slow );
20481 %}
20482
20483 // Longs vector add
20484 instruct vaddL(vec dst, vec src) %{
20485 predicate(UseAVX == 0);
20486 match(Set dst (AddVL dst src));
20487 format %{ "paddq $dst,$src\t! add packedL" %}
20488 ins_encode %{
20489 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20490 %}
20491 ins_pipe( pipe_slow );
20492 %}
20493
20494 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20495 predicate(UseAVX > 0);
20496 match(Set dst (AddVL src1 src2));
20497 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20498 ins_encode %{
20499 int vlen_enc = vector_length_encoding(this);
20500 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20501 %}
20502 ins_pipe( pipe_slow );
20503 %}
20504
20505 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20506 predicate((UseAVX > 0) &&
20507 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20508 match(Set dst (AddVL src (LoadVector mem)));
20509 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20510 ins_encode %{
20511 int vlen_enc = vector_length_encoding(this);
20512 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20513 %}
20514 ins_pipe( pipe_slow );
20515 %}
20516
20517 // Floats vector add
20518 instruct vaddF(vec dst, vec src) %{
20519 predicate(UseAVX == 0);
20520 match(Set dst (AddVF dst src));
20521 format %{ "addps $dst,$src\t! add packedF" %}
20522 ins_encode %{
20523 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20524 %}
20525 ins_pipe( pipe_slow );
20526 %}
20527
20528 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20529 predicate(UseAVX > 0);
20530 match(Set dst (AddVF src1 src2));
20531 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20532 ins_encode %{
20533 int vlen_enc = vector_length_encoding(this);
20534 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535 %}
20536 ins_pipe( pipe_slow );
20537 %}
20538
20539 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20540 predicate((UseAVX > 0) &&
20541 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20542 match(Set dst (AddVF src (LoadVector mem)));
20543 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20544 ins_encode %{
20545 int vlen_enc = vector_length_encoding(this);
20546 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20547 %}
20548 ins_pipe( pipe_slow );
20549 %}
20550
20551 // Doubles vector add
20552 instruct vaddD(vec dst, vec src) %{
20553 predicate(UseAVX == 0);
20554 match(Set dst (AddVD dst src));
20555 format %{ "addpd $dst,$src\t! add packedD" %}
20556 ins_encode %{
20557 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20558 %}
20559 ins_pipe( pipe_slow );
20560 %}
20561
20562 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20563 predicate(UseAVX > 0);
20564 match(Set dst (AddVD src1 src2));
20565 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20566 ins_encode %{
20567 int vlen_enc = vector_length_encoding(this);
20568 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20569 %}
20570 ins_pipe( pipe_slow );
20571 %}
20572
20573 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20574 predicate((UseAVX > 0) &&
20575 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20576 match(Set dst (AddVD src (LoadVector mem)));
20577 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20578 ins_encode %{
20579 int vlen_enc = vector_length_encoding(this);
20580 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20581 %}
20582 ins_pipe( pipe_slow );
20583 %}
20584
20585 // --------------------------------- SUB --------------------------------------
20586
20587 // Bytes vector sub
20588 instruct vsubB(vec dst, vec src) %{
20589 predicate(UseAVX == 0);
20590 match(Set dst (SubVB dst src));
20591 format %{ "psubb $dst,$src\t! sub packedB" %}
20592 ins_encode %{
20593 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20594 %}
20595 ins_pipe( pipe_slow );
20596 %}
20597
20598 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20599 predicate(UseAVX > 0);
20600 match(Set dst (SubVB src1 src2));
20601 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20602 ins_encode %{
20603 int vlen_enc = vector_length_encoding(this);
20604 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20605 %}
20606 ins_pipe( pipe_slow );
20607 %}
20608
20609 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20610 predicate((UseAVX > 0) &&
20611 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20612 match(Set dst (SubVB src (LoadVector mem)));
20613 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20614 ins_encode %{
20615 int vlen_enc = vector_length_encoding(this);
20616 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20617 %}
20618 ins_pipe( pipe_slow );
20619 %}
20620
20621 // Shorts/Chars vector sub
20622 instruct vsubS(vec dst, vec src) %{
20623 predicate(UseAVX == 0);
20624 match(Set dst (SubVS dst src));
20625 format %{ "psubw $dst,$src\t! sub packedS" %}
20626 ins_encode %{
20627 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20628 %}
20629 ins_pipe( pipe_slow );
20630 %}
20631
20632
20633 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20634 predicate(UseAVX > 0);
20635 match(Set dst (SubVS src1 src2));
20636 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20637 ins_encode %{
20638 int vlen_enc = vector_length_encoding(this);
20639 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20640 %}
20641 ins_pipe( pipe_slow );
20642 %}
20643
20644 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20645 predicate((UseAVX > 0) &&
20646 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20647 match(Set dst (SubVS src (LoadVector mem)));
20648 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20649 ins_encode %{
20650 int vlen_enc = vector_length_encoding(this);
20651 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20652 %}
20653 ins_pipe( pipe_slow );
20654 %}
20655
20656 // Integers vector sub
20657 instruct vsubI(vec dst, vec src) %{
20658 predicate(UseAVX == 0);
20659 match(Set dst (SubVI dst src));
20660 format %{ "psubd $dst,$src\t! sub packedI" %}
20661 ins_encode %{
20662 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20663 %}
20664 ins_pipe( pipe_slow );
20665 %}
20666
20667 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20668 predicate(UseAVX > 0);
20669 match(Set dst (SubVI src1 src2));
20670 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20671 ins_encode %{
20672 int vlen_enc = vector_length_encoding(this);
20673 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20674 %}
20675 ins_pipe( pipe_slow );
20676 %}
20677
20678 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20679 predicate((UseAVX > 0) &&
20680 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20681 match(Set dst (SubVI src (LoadVector mem)));
20682 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20683 ins_encode %{
20684 int vlen_enc = vector_length_encoding(this);
20685 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20686 %}
20687 ins_pipe( pipe_slow );
20688 %}
20689
20690 // Longs vector sub
20691 instruct vsubL(vec dst, vec src) %{
20692 predicate(UseAVX == 0);
20693 match(Set dst (SubVL dst src));
20694 format %{ "psubq $dst,$src\t! sub packedL" %}
20695 ins_encode %{
20696 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20697 %}
20698 ins_pipe( pipe_slow );
20699 %}
20700
20701 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20702 predicate(UseAVX > 0);
20703 match(Set dst (SubVL src1 src2));
20704 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20705 ins_encode %{
20706 int vlen_enc = vector_length_encoding(this);
20707 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20708 %}
20709 ins_pipe( pipe_slow );
20710 %}
20711
20712
20713 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20714 predicate((UseAVX > 0) &&
20715 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20716 match(Set dst (SubVL src (LoadVector mem)));
20717 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20718 ins_encode %{
20719 int vlen_enc = vector_length_encoding(this);
20720 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20721 %}
20722 ins_pipe( pipe_slow );
20723 %}
20724
20725 // Floats vector sub
20726 instruct vsubF(vec dst, vec src) %{
20727 predicate(UseAVX == 0);
20728 match(Set dst (SubVF dst src));
20729 format %{ "subps $dst,$src\t! sub packedF" %}
20730 ins_encode %{
20731 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20732 %}
20733 ins_pipe( pipe_slow );
20734 %}
20735
20736 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20737 predicate(UseAVX > 0);
20738 match(Set dst (SubVF src1 src2));
20739 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20740 ins_encode %{
20741 int vlen_enc = vector_length_encoding(this);
20742 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20743 %}
20744 ins_pipe( pipe_slow );
20745 %}
20746
20747 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20748 predicate((UseAVX > 0) &&
20749 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20750 match(Set dst (SubVF src (LoadVector mem)));
20751 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20752 ins_encode %{
20753 int vlen_enc = vector_length_encoding(this);
20754 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20755 %}
20756 ins_pipe( pipe_slow );
20757 %}
20758
20759 // Doubles vector sub
20760 instruct vsubD(vec dst, vec src) %{
20761 predicate(UseAVX == 0);
20762 match(Set dst (SubVD dst src));
20763 format %{ "subpd $dst,$src\t! sub packedD" %}
20764 ins_encode %{
20765 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20766 %}
20767 ins_pipe( pipe_slow );
20768 %}
20769
20770 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20771 predicate(UseAVX > 0);
20772 match(Set dst (SubVD src1 src2));
20773 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20774 ins_encode %{
20775 int vlen_enc = vector_length_encoding(this);
20776 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20777 %}
20778 ins_pipe( pipe_slow );
20779 %}
20780
20781 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20782 predicate((UseAVX > 0) &&
20783 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20784 match(Set dst (SubVD src (LoadVector mem)));
20785 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20786 ins_encode %{
20787 int vlen_enc = vector_length_encoding(this);
20788 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20789 %}
20790 ins_pipe( pipe_slow );
20791 %}
20792
20793 // --------------------------------- MUL --------------------------------------
20794
20795 // Byte vector mul
20796 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20797 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20798 match(Set dst (MulVB src1 src2));
20799 effect(TEMP dst, TEMP xtmp);
20800 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20801 ins_encode %{
20802 assert(UseSSE > 3, "required");
20803 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20804 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20805 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20806 __ psllw($dst$$XMMRegister, 8);
20807 __ psrlw($dst$$XMMRegister, 8);
20808 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20809 %}
20810 ins_pipe( pipe_slow );
20811 %}
20812
20813 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20814 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20815 match(Set dst (MulVB src1 src2));
20816 effect(TEMP dst, TEMP xtmp);
20817 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20818 ins_encode %{
20819 assert(UseSSE > 3, "required");
20820 // Odd-index elements
20821 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20822 __ psrlw($dst$$XMMRegister, 8);
20823 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20824 __ psrlw($xtmp$$XMMRegister, 8);
20825 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20826 __ psllw($dst$$XMMRegister, 8);
20827 // Even-index elements
20828 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20829 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20830 __ psllw($xtmp$$XMMRegister, 8);
20831 __ psrlw($xtmp$$XMMRegister, 8);
20832 // Combine
20833 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20834 %}
20835 ins_pipe( pipe_slow );
20836 %}
20837
20838 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20839 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20840 match(Set dst (MulVB src1 src2));
20841 effect(TEMP xtmp1, TEMP xtmp2);
20842 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20843 ins_encode %{
20844 int vlen_enc = vector_length_encoding(this);
20845 // Odd-index elements
20846 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20847 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20848 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20849 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20850 // Even-index elements
20851 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20852 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20853 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20854 // Combine
20855 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20856 %}
20857 ins_pipe( pipe_slow );
20858 %}
20859
20860 // Shorts/Chars vector mul
20861 instruct vmulS(vec dst, vec src) %{
20862 predicate(UseAVX == 0);
20863 match(Set dst (MulVS dst src));
20864 format %{ "pmullw $dst,$src\t! mul packedS" %}
20865 ins_encode %{
20866 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20867 %}
20868 ins_pipe( pipe_slow );
20869 %}
20870
20871 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20872 predicate(UseAVX > 0);
20873 match(Set dst (MulVS src1 src2));
20874 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20875 ins_encode %{
20876 int vlen_enc = vector_length_encoding(this);
20877 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20878 %}
20879 ins_pipe( pipe_slow );
20880 %}
20881
20882 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20883 predicate((UseAVX > 0) &&
20884 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20885 match(Set dst (MulVS src (LoadVector mem)));
20886 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20887 ins_encode %{
20888 int vlen_enc = vector_length_encoding(this);
20889 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20890 %}
20891 ins_pipe( pipe_slow );
20892 %}
20893
20894 // Integers vector mul
20895 instruct vmulI(vec dst, vec src) %{
20896 predicate(UseAVX == 0);
20897 match(Set dst (MulVI dst src));
20898 format %{ "pmulld $dst,$src\t! mul packedI" %}
20899 ins_encode %{
20900 assert(UseSSE > 3, "required");
20901 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20902 %}
20903 ins_pipe( pipe_slow );
20904 %}
20905
20906 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20907 predicate(UseAVX > 0);
20908 match(Set dst (MulVI src1 src2));
20909 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20910 ins_encode %{
20911 int vlen_enc = vector_length_encoding(this);
20912 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20913 %}
20914 ins_pipe( pipe_slow );
20915 %}
20916
20917 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20918 predicate((UseAVX > 0) &&
20919 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20920 match(Set dst (MulVI src (LoadVector mem)));
20921 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20922 ins_encode %{
20923 int vlen_enc = vector_length_encoding(this);
20924 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20925 %}
20926 ins_pipe( pipe_slow );
20927 %}
20928
20929 // Longs vector mul
20930 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20931 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20932 VM_Version::supports_avx512dq()) ||
20933 VM_Version::supports_avx512vldq());
20934 match(Set dst (MulVL src1 src2));
20935 ins_cost(500);
20936 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20937 ins_encode %{
20938 assert(UseAVX > 2, "required");
20939 int vlen_enc = vector_length_encoding(this);
20940 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20941 %}
20942 ins_pipe( pipe_slow );
20943 %}
20944
20945 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20946 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20947 VM_Version::supports_avx512dq()) ||
20948 (Matcher::vector_length_in_bytes(n) > 8 &&
20949 VM_Version::supports_avx512vldq()));
20950 match(Set dst (MulVL src (LoadVector mem)));
20951 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20952 ins_cost(500);
20953 ins_encode %{
20954 assert(UseAVX > 2, "required");
20955 int vlen_enc = vector_length_encoding(this);
20956 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20957 %}
20958 ins_pipe( pipe_slow );
20959 %}
20960
20961 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20962 predicate(UseAVX == 0);
20963 match(Set dst (MulVL src1 src2));
20964 ins_cost(500);
20965 effect(TEMP dst, TEMP xtmp);
20966 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20967 ins_encode %{
20968 assert(VM_Version::supports_sse4_1(), "required");
20969 // Get the lo-hi products, only the lower 32 bits is in concerns
20970 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20971 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20972 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20973 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20974 __ psllq($dst$$XMMRegister, 32);
20975 // Get the lo-lo products
20976 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20977 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20978 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20979 %}
20980 ins_pipe( pipe_slow );
20981 %}
20982
20983 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20984 predicate(UseAVX > 0 &&
20985 ((Matcher::vector_length_in_bytes(n) == 64 &&
20986 !VM_Version::supports_avx512dq()) ||
20987 (Matcher::vector_length_in_bytes(n) < 64 &&
20988 !VM_Version::supports_avx512vldq())));
20989 match(Set dst (MulVL src1 src2));
20990 effect(TEMP xtmp1, TEMP xtmp2);
20991 ins_cost(500);
20992 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20993 ins_encode %{
20994 int vlen_enc = vector_length_encoding(this);
20995 // Get the lo-hi products, only the lower 32 bits is in concerns
20996 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20997 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20998 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20999 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
21000 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
21001 // Get the lo-lo products
21002 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21003 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21004 %}
21005 ins_pipe( pipe_slow );
21006 %}
21007
21008 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
21009 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
21010 match(Set dst (MulVL src1 src2));
21011 ins_cost(100);
21012 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
21013 ins_encode %{
21014 int vlen_enc = vector_length_encoding(this);
21015 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21016 %}
21017 ins_pipe( pipe_slow );
21018 %}
21019
21020 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
21021 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
21022 match(Set dst (MulVL src1 src2));
21023 ins_cost(100);
21024 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
21025 ins_encode %{
21026 int vlen_enc = vector_length_encoding(this);
21027 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21028 %}
21029 ins_pipe( pipe_slow );
21030 %}
21031
21032 // Floats vector mul
21033 instruct vmulF(vec dst, vec src) %{
21034 predicate(UseAVX == 0);
21035 match(Set dst (MulVF dst src));
21036 format %{ "mulps $dst,$src\t! mul packedF" %}
21037 ins_encode %{
21038 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
21039 %}
21040 ins_pipe( pipe_slow );
21041 %}
21042
21043 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
21044 predicate(UseAVX > 0);
21045 match(Set dst (MulVF src1 src2));
21046 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
21047 ins_encode %{
21048 int vlen_enc = vector_length_encoding(this);
21049 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21050 %}
21051 ins_pipe( pipe_slow );
21052 %}
21053
21054 instruct vmulF_mem(vec dst, vec src, memory mem) %{
21055 predicate((UseAVX > 0) &&
21056 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21057 match(Set dst (MulVF src (LoadVector mem)));
21058 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
21059 ins_encode %{
21060 int vlen_enc = vector_length_encoding(this);
21061 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21062 %}
21063 ins_pipe( pipe_slow );
21064 %}
21065
21066 // Doubles vector mul
21067 instruct vmulD(vec dst, vec src) %{
21068 predicate(UseAVX == 0);
21069 match(Set dst (MulVD dst src));
21070 format %{ "mulpd $dst,$src\t! mul packedD" %}
21071 ins_encode %{
21072 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21073 %}
21074 ins_pipe( pipe_slow );
21075 %}
21076
21077 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21078 predicate(UseAVX > 0);
21079 match(Set dst (MulVD src1 src2));
21080 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
21081 ins_encode %{
21082 int vlen_enc = vector_length_encoding(this);
21083 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21084 %}
21085 ins_pipe( pipe_slow );
21086 %}
21087
21088 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21089 predicate((UseAVX > 0) &&
21090 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21091 match(Set dst (MulVD src (LoadVector mem)));
21092 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
21093 ins_encode %{
21094 int vlen_enc = vector_length_encoding(this);
21095 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21096 %}
21097 ins_pipe( pipe_slow );
21098 %}
21099
21100 // --------------------------------- DIV --------------------------------------
21101
21102 // Floats vector div
21103 instruct vdivF(vec dst, vec src) %{
21104 predicate(UseAVX == 0);
21105 match(Set dst (DivVF dst src));
21106 format %{ "divps $dst,$src\t! div packedF" %}
21107 ins_encode %{
21108 __ divps($dst$$XMMRegister, $src$$XMMRegister);
21109 %}
21110 ins_pipe( pipe_slow );
21111 %}
21112
21113 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21114 predicate(UseAVX > 0);
21115 match(Set dst (DivVF src1 src2));
21116 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
21117 ins_encode %{
21118 int vlen_enc = vector_length_encoding(this);
21119 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21120 %}
21121 ins_pipe( pipe_slow );
21122 %}
21123
21124 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21125 predicate((UseAVX > 0) &&
21126 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21127 match(Set dst (DivVF src (LoadVector mem)));
21128 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
21129 ins_encode %{
21130 int vlen_enc = vector_length_encoding(this);
21131 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21132 %}
21133 ins_pipe( pipe_slow );
21134 %}
21135
21136 // Doubles vector div
21137 instruct vdivD(vec dst, vec src) %{
21138 predicate(UseAVX == 0);
21139 match(Set dst (DivVD dst src));
21140 format %{ "divpd $dst,$src\t! div packedD" %}
21141 ins_encode %{
21142 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21143 %}
21144 ins_pipe( pipe_slow );
21145 %}
21146
21147 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21148 predicate(UseAVX > 0);
21149 match(Set dst (DivVD src1 src2));
21150 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
21151 ins_encode %{
21152 int vlen_enc = vector_length_encoding(this);
21153 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21154 %}
21155 ins_pipe( pipe_slow );
21156 %}
21157
21158 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21159 predicate((UseAVX > 0) &&
21160 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21161 match(Set dst (DivVD src (LoadVector mem)));
21162 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
21163 ins_encode %{
21164 int vlen_enc = vector_length_encoding(this);
21165 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21166 %}
21167 ins_pipe( pipe_slow );
21168 %}
21169
21170 // ------------------------------ MinMax ---------------------------------------
21171
21172 // Byte, Short, Int vector Min/Max
21173 instruct minmax_reg_sse(vec dst, vec src) %{
21174 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21175 UseAVX == 0);
21176 match(Set dst (MinV dst src));
21177 match(Set dst (MaxV dst src));
21178 format %{ "vector_minmax $dst,$src\t! " %}
21179 ins_encode %{
21180 assert(UseSSE >= 4, "required");
21181
21182 int opcode = this->ideal_Opcode();
21183 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21184 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21185 %}
21186 ins_pipe( pipe_slow );
21187 %}
21188
21189 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21190 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21191 UseAVX > 0);
21192 match(Set dst (MinV src1 src2));
21193 match(Set dst (MaxV src1 src2));
21194 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
21195 ins_encode %{
21196 int opcode = this->ideal_Opcode();
21197 int vlen_enc = vector_length_encoding(this);
21198 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21199
21200 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21201 %}
21202 ins_pipe( pipe_slow );
21203 %}
21204
21205 // Long vector Min/Max
21206 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21207 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21208 UseAVX == 0);
21209 match(Set dst (MinV dst src));
21210 match(Set dst (MaxV src dst));
21211 effect(TEMP dst, TEMP tmp);
21212 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
21213 ins_encode %{
21214 assert(UseSSE >= 4, "required");
21215
21216 int opcode = this->ideal_Opcode();
21217 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21218 assert(elem_bt == T_LONG, "sanity");
21219
21220 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21221 %}
21222 ins_pipe( pipe_slow );
21223 %}
21224
21225 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21226 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21227 UseAVX > 0 && !VM_Version::supports_avx512vl());
21228 match(Set dst (MinV src1 src2));
21229 match(Set dst (MaxV src1 src2));
21230 effect(TEMP dst);
21231 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
21232 ins_encode %{
21233 int vlen_enc = vector_length_encoding(this);
21234 int opcode = this->ideal_Opcode();
21235 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21236 assert(elem_bt == T_LONG, "sanity");
21237
21238 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21239 %}
21240 ins_pipe( pipe_slow );
21241 %}
21242
21243 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21244 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21245 Matcher::vector_element_basic_type(n) == T_LONG);
21246 match(Set dst (MinV src1 src2));
21247 match(Set dst (MaxV src1 src2));
21248 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21249 ins_encode %{
21250 assert(UseAVX > 2, "required");
21251
21252 int vlen_enc = vector_length_encoding(this);
21253 int opcode = this->ideal_Opcode();
21254 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21255 assert(elem_bt == T_LONG, "sanity");
21256
21257 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21258 %}
21259 ins_pipe( pipe_slow );
21260 %}
21261
21262 // Float/Double vector Min/Max
21263 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21264 predicate(VM_Version::supports_avx10_2() &&
21265 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21266 match(Set dst (MinV a b));
21267 match(Set dst (MaxV a b));
21268 format %{ "vector_minmaxFP $dst, $a, $b" %}
21269 ins_encode %{
21270 int vlen_enc = vector_length_encoding(this);
21271 int opcode = this->ideal_Opcode();
21272 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21273 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21274 %}
21275 ins_pipe( pipe_slow );
21276 %}
21277
21278 // Float/Double vector Min/Max
21279 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21280 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21281 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21282 UseAVX > 0);
21283 match(Set dst (MinV a b));
21284 match(Set dst (MaxV a b));
21285 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21286 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21287 ins_encode %{
21288 assert(UseAVX > 0, "required");
21289
21290 int opcode = this->ideal_Opcode();
21291 int vlen_enc = vector_length_encoding(this);
21292 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21293
21294 __ vminmax_fp(opcode, elem_bt,
21295 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21296 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21297 %}
21298 ins_pipe( pipe_slow );
21299 %}
21300
21301 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21302 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21303 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21304 match(Set dst (MinV a b));
21305 match(Set dst (MaxV a b));
21306 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21307 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21308 ins_encode %{
21309 assert(UseAVX > 2, "required");
21310
21311 int opcode = this->ideal_Opcode();
21312 int vlen_enc = vector_length_encoding(this);
21313 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21314
21315 __ evminmax_fp(opcode, elem_bt,
21316 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21317 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21318 %}
21319 ins_pipe( pipe_slow );
21320 %}
21321
21322 // ------------------------------ Unsigned vector Min/Max ----------------------
21323
21324 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21325 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21326 match(Set dst (UMinV a b));
21327 match(Set dst (UMaxV a b));
21328 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21329 ins_encode %{
21330 int opcode = this->ideal_Opcode();
21331 int vlen_enc = vector_length_encoding(this);
21332 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21333 assert(is_integral_type(elem_bt), "");
21334 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21335 %}
21336 ins_pipe( pipe_slow );
21337 %}
21338
21339 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21340 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21341 match(Set dst (UMinV a (LoadVector b)));
21342 match(Set dst (UMaxV a (LoadVector b)));
21343 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21344 ins_encode %{
21345 int opcode = this->ideal_Opcode();
21346 int vlen_enc = vector_length_encoding(this);
21347 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21348 assert(is_integral_type(elem_bt), "");
21349 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21350 %}
21351 ins_pipe( pipe_slow );
21352 %}
21353
21354 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21355 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21356 match(Set dst (UMinV a b));
21357 match(Set dst (UMaxV a b));
21358 effect(TEMP xtmp1, TEMP xtmp2);
21359 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21360 ins_encode %{
21361 int opcode = this->ideal_Opcode();
21362 int vlen_enc = vector_length_encoding(this);
21363 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21364 %}
21365 ins_pipe( pipe_slow );
21366 %}
21367
21368 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21369 match(Set dst (UMinV (Binary dst src2) mask));
21370 match(Set dst (UMaxV (Binary dst src2) mask));
21371 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21372 ins_encode %{
21373 int vlen_enc = vector_length_encoding(this);
21374 BasicType bt = Matcher::vector_element_basic_type(this);
21375 int opc = this->ideal_Opcode();
21376 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21377 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21378 %}
21379 ins_pipe( pipe_slow );
21380 %}
21381
21382 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21383 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21384 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21385 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21386 ins_encode %{
21387 int vlen_enc = vector_length_encoding(this);
21388 BasicType bt = Matcher::vector_element_basic_type(this);
21389 int opc = this->ideal_Opcode();
21390 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21391 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21392 %}
21393 ins_pipe( pipe_slow );
21394 %}
21395
21396 // --------------------------------- Signum/CopySign ---------------------------
21397
21398 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21399 match(Set dst (SignumF dst (Binary zero one)));
21400 effect(KILL cr);
21401 format %{ "signumF $dst, $dst" %}
21402 ins_encode %{
21403 int opcode = this->ideal_Opcode();
21404 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21405 %}
21406 ins_pipe( pipe_slow );
21407 %}
21408
21409 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21410 match(Set dst (SignumD dst (Binary zero one)));
21411 effect(KILL cr);
21412 format %{ "signumD $dst, $dst" %}
21413 ins_encode %{
21414 int opcode = this->ideal_Opcode();
21415 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21416 %}
21417 ins_pipe( pipe_slow );
21418 %}
21419
21420 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21421 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21422 match(Set dst (SignumVF src (Binary zero one)));
21423 match(Set dst (SignumVD src (Binary zero one)));
21424 effect(TEMP dst, TEMP xtmp1);
21425 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21426 ins_encode %{
21427 int opcode = this->ideal_Opcode();
21428 int vec_enc = vector_length_encoding(this);
21429 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21430 $xtmp1$$XMMRegister, vec_enc);
21431 %}
21432 ins_pipe( pipe_slow );
21433 %}
21434
21435 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21436 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21437 match(Set dst (SignumVF src (Binary zero one)));
21438 match(Set dst (SignumVD src (Binary zero one)));
21439 effect(TEMP dst, TEMP ktmp1);
21440 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21441 ins_encode %{
21442 int opcode = this->ideal_Opcode();
21443 int vec_enc = vector_length_encoding(this);
21444 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21445 $ktmp1$$KRegister, vec_enc);
21446 %}
21447 ins_pipe( pipe_slow );
21448 %}
21449
21450 // ---------------------------------------
21451 // For copySign use 0xE4 as writemask for vpternlog
21452 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21453 // C (xmm2) is set to 0x7FFFFFFF
21454 // Wherever xmm2 is 0, we want to pick from B (sign)
21455 // Wherever xmm2 is 1, we want to pick from A (src)
21456 //
21457 // A B C Result
21458 // 0 0 0 0
21459 // 0 0 1 0
21460 // 0 1 0 1
21461 // 0 1 1 0
21462 // 1 0 0 0
21463 // 1 0 1 1
21464 // 1 1 0 1
21465 // 1 1 1 1
21466 //
21467 // Result going from high bit to low bit is 0x11100100 = 0xe4
21468 // ---------------------------------------
21469
21470 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21471 match(Set dst (CopySignF dst src));
21472 effect(TEMP tmp1, TEMP tmp2);
21473 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21474 ins_encode %{
21475 __ movl($tmp2$$Register, 0x7FFFFFFF);
21476 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21477 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21478 %}
21479 ins_pipe( pipe_slow );
21480 %}
21481
21482 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21483 match(Set dst (CopySignD dst (Binary src zero)));
21484 ins_cost(100);
21485 effect(TEMP tmp1, TEMP tmp2);
21486 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21487 ins_encode %{
21488 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21489 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21490 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21491 %}
21492 ins_pipe( pipe_slow );
21493 %}
21494
21495 //----------------------------- CompressBits/ExpandBits ------------------------
21496
21497 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21498 predicate(n->bottom_type()->isa_int());
21499 match(Set dst (CompressBits src mask));
21500 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21501 ins_encode %{
21502 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21503 %}
21504 ins_pipe( pipe_slow );
21505 %}
21506
21507 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21508 predicate(n->bottom_type()->isa_int());
21509 match(Set dst (ExpandBits src mask));
21510 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21511 ins_encode %{
21512 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21513 %}
21514 ins_pipe( pipe_slow );
21515 %}
21516
21517 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21518 predicate(n->bottom_type()->isa_int());
21519 match(Set dst (CompressBits src (LoadI mask)));
21520 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21521 ins_encode %{
21522 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21523 %}
21524 ins_pipe( pipe_slow );
21525 %}
21526
21527 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21528 predicate(n->bottom_type()->isa_int());
21529 match(Set dst (ExpandBits src (LoadI mask)));
21530 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21531 ins_encode %{
21532 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21533 %}
21534 ins_pipe( pipe_slow );
21535 %}
21536
21537 // --------------------------------- Sqrt --------------------------------------
21538
21539 instruct vsqrtF_reg(vec dst, vec src) %{
21540 match(Set dst (SqrtVF src));
21541 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21542 ins_encode %{
21543 assert(UseAVX > 0, "required");
21544 int vlen_enc = vector_length_encoding(this);
21545 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21546 %}
21547 ins_pipe( pipe_slow );
21548 %}
21549
21550 instruct vsqrtF_mem(vec dst, memory mem) %{
21551 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21552 match(Set dst (SqrtVF (LoadVector mem)));
21553 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21554 ins_encode %{
21555 assert(UseAVX > 0, "required");
21556 int vlen_enc = vector_length_encoding(this);
21557 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21558 %}
21559 ins_pipe( pipe_slow );
21560 %}
21561
21562 // Floating point vector sqrt
21563 instruct vsqrtD_reg(vec dst, vec src) %{
21564 match(Set dst (SqrtVD src));
21565 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21566 ins_encode %{
21567 assert(UseAVX > 0, "required");
21568 int vlen_enc = vector_length_encoding(this);
21569 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21570 %}
21571 ins_pipe( pipe_slow );
21572 %}
21573
21574 instruct vsqrtD_mem(vec dst, memory mem) %{
21575 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21576 match(Set dst (SqrtVD (LoadVector mem)));
21577 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21578 ins_encode %{
21579 assert(UseAVX > 0, "required");
21580 int vlen_enc = vector_length_encoding(this);
21581 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21582 %}
21583 ins_pipe( pipe_slow );
21584 %}
21585
21586 // ------------------------------ Shift ---------------------------------------
21587
21588 // Left and right shift count vectors are the same on x86
21589 // (only lowest bits of xmm reg are used for count).
21590 instruct vshiftcnt(vec dst, rRegI cnt) %{
21591 match(Set dst (LShiftCntV cnt));
21592 match(Set dst (RShiftCntV cnt));
21593 format %{ "movdl $dst,$cnt\t! load shift count" %}
21594 ins_encode %{
21595 __ movdl($dst$$XMMRegister, $cnt$$Register);
21596 %}
21597 ins_pipe( pipe_slow );
21598 %}
21599
21600 // Byte vector shift
21601 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21602 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21603 match(Set dst ( LShiftVB src shift));
21604 match(Set dst ( RShiftVB src shift));
21605 match(Set dst (URShiftVB src shift));
21606 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21607 format %{"vector_byte_shift $dst,$src,$shift" %}
21608 ins_encode %{
21609 assert(UseSSE > 3, "required");
21610 int opcode = this->ideal_Opcode();
21611 bool sign = (opcode != Op_URShiftVB);
21612 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21613 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21614 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21615 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21616 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21617 %}
21618 ins_pipe( pipe_slow );
21619 %}
21620
21621 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21622 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21623 UseAVX <= 1);
21624 match(Set dst ( LShiftVB src shift));
21625 match(Set dst ( RShiftVB src shift));
21626 match(Set dst (URShiftVB src shift));
21627 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21628 format %{"vector_byte_shift $dst,$src,$shift" %}
21629 ins_encode %{
21630 assert(UseSSE > 3, "required");
21631 int opcode = this->ideal_Opcode();
21632 bool sign = (opcode != Op_URShiftVB);
21633 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21634 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21635 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21636 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21637 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21638 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21639 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21640 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21641 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21642 %}
21643 ins_pipe( pipe_slow );
21644 %}
21645
21646 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21647 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21648 UseAVX > 1);
21649 match(Set dst ( LShiftVB src shift));
21650 match(Set dst ( RShiftVB src shift));
21651 match(Set dst (URShiftVB src shift));
21652 effect(TEMP dst, TEMP tmp);
21653 format %{"vector_byte_shift $dst,$src,$shift" %}
21654 ins_encode %{
21655 int opcode = this->ideal_Opcode();
21656 bool sign = (opcode != Op_URShiftVB);
21657 int vlen_enc = Assembler::AVX_256bit;
21658 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21659 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21660 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21661 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21662 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21663 %}
21664 ins_pipe( pipe_slow );
21665 %}
21666
21667 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21668 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21669 match(Set dst ( LShiftVB src shift));
21670 match(Set dst ( RShiftVB src shift));
21671 match(Set dst (URShiftVB src shift));
21672 effect(TEMP dst, TEMP tmp);
21673 format %{"vector_byte_shift $dst,$src,$shift" %}
21674 ins_encode %{
21675 assert(UseAVX > 1, "required");
21676 int opcode = this->ideal_Opcode();
21677 bool sign = (opcode != Op_URShiftVB);
21678 int vlen_enc = Assembler::AVX_256bit;
21679 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21680 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21681 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21682 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21683 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21684 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21685 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21686 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21687 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21688 %}
21689 ins_pipe( pipe_slow );
21690 %}
21691
21692 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21693 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21694 match(Set dst ( LShiftVB src shift));
21695 match(Set dst (RShiftVB src shift));
21696 match(Set dst (URShiftVB src shift));
21697 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21698 format %{"vector_byte_shift $dst,$src,$shift" %}
21699 ins_encode %{
21700 assert(UseAVX > 2, "required");
21701 int opcode = this->ideal_Opcode();
21702 bool sign = (opcode != Op_URShiftVB);
21703 int vlen_enc = Assembler::AVX_512bit;
21704 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21705 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21706 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21707 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21708 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21709 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21710 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21711 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21712 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21713 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21714 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21715 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21716 %}
21717 ins_pipe( pipe_slow );
21718 %}
21719
21720 // Shorts vector logical right shift produces incorrect Java result
21721 // for negative data because java code convert short value into int with
21722 // sign extension before a shift. But char vectors are fine since chars are
21723 // unsigned values.
21724 // Shorts/Chars vector left shift
21725 instruct vshiftS(vec dst, vec src, vec shift) %{
21726 predicate(!n->as_ShiftV()->is_var_shift());
21727 match(Set dst ( LShiftVS src shift));
21728 match(Set dst ( RShiftVS src shift));
21729 match(Set dst (URShiftVS src shift));
21730 effect(TEMP dst, USE src, USE shift);
21731 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21732 ins_encode %{
21733 int opcode = this->ideal_Opcode();
21734 if (UseAVX > 0) {
21735 int vlen_enc = vector_length_encoding(this);
21736 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21737 } else {
21738 int vlen = Matcher::vector_length(this);
21739 if (vlen == 2) {
21740 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21741 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21742 } else if (vlen == 4) {
21743 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21744 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21745 } else {
21746 assert (vlen == 8, "sanity");
21747 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21748 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21749 }
21750 }
21751 %}
21752 ins_pipe( pipe_slow );
21753 %}
21754
21755 // Integers vector left shift
21756 instruct vshiftI(vec dst, vec src, vec shift) %{
21757 predicate(!n->as_ShiftV()->is_var_shift());
21758 match(Set dst ( LShiftVI src shift));
21759 match(Set dst ( RShiftVI src shift));
21760 match(Set dst (URShiftVI src shift));
21761 effect(TEMP dst, USE src, USE shift);
21762 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21763 ins_encode %{
21764 int opcode = this->ideal_Opcode();
21765 if (UseAVX > 0) {
21766 int vlen_enc = vector_length_encoding(this);
21767 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21768 } else {
21769 int vlen = Matcher::vector_length(this);
21770 if (vlen == 2) {
21771 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21772 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21773 } else {
21774 assert(vlen == 4, "sanity");
21775 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21776 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21777 }
21778 }
21779 %}
21780 ins_pipe( pipe_slow );
21781 %}
21782
21783 // Integers vector left constant shift
21784 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21785 match(Set dst (LShiftVI src (LShiftCntV shift)));
21786 match(Set dst (RShiftVI src (RShiftCntV shift)));
21787 match(Set dst (URShiftVI src (RShiftCntV shift)));
21788 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21789 ins_encode %{
21790 int opcode = this->ideal_Opcode();
21791 if (UseAVX > 0) {
21792 int vector_len = vector_length_encoding(this);
21793 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21794 } else {
21795 int vlen = Matcher::vector_length(this);
21796 if (vlen == 2) {
21797 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21798 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21799 } else {
21800 assert(vlen == 4, "sanity");
21801 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21802 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21803 }
21804 }
21805 %}
21806 ins_pipe( pipe_slow );
21807 %}
21808
21809 // Longs vector shift
21810 instruct vshiftL(vec dst, vec src, vec shift) %{
21811 predicate(!n->as_ShiftV()->is_var_shift());
21812 match(Set dst ( LShiftVL src shift));
21813 match(Set dst (URShiftVL src shift));
21814 effect(TEMP dst, USE src, USE shift);
21815 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21816 ins_encode %{
21817 int opcode = this->ideal_Opcode();
21818 if (UseAVX > 0) {
21819 int vlen_enc = vector_length_encoding(this);
21820 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21821 } else {
21822 assert(Matcher::vector_length(this) == 2, "");
21823 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21824 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21825 }
21826 %}
21827 ins_pipe( pipe_slow );
21828 %}
21829
21830 // Longs vector constant shift
21831 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21832 match(Set dst (LShiftVL src (LShiftCntV shift)));
21833 match(Set dst (URShiftVL src (RShiftCntV shift)));
21834 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21835 ins_encode %{
21836 int opcode = this->ideal_Opcode();
21837 if (UseAVX > 0) {
21838 int vector_len = vector_length_encoding(this);
21839 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21840 } else {
21841 assert(Matcher::vector_length(this) == 2, "");
21842 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21843 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21844 }
21845 %}
21846 ins_pipe( pipe_slow );
21847 %}
21848
21849 // -------------------ArithmeticRightShift -----------------------------------
21850 // Long vector arithmetic right shift
21851 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21852 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21853 match(Set dst (RShiftVL src shift));
21854 effect(TEMP dst, TEMP tmp);
21855 format %{ "vshiftq $dst,$src,$shift" %}
21856 ins_encode %{
21857 uint vlen = Matcher::vector_length(this);
21858 if (vlen == 2) {
21859 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21860 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21861 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21862 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21863 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21864 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21865 } else {
21866 assert(vlen == 4, "sanity");
21867 assert(UseAVX > 1, "required");
21868 int vlen_enc = Assembler::AVX_256bit;
21869 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21870 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21871 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21872 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21873 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21874 }
21875 %}
21876 ins_pipe( pipe_slow );
21877 %}
21878
21879 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21880 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21881 match(Set dst (RShiftVL src shift));
21882 format %{ "vshiftq $dst,$src,$shift" %}
21883 ins_encode %{
21884 int vlen_enc = vector_length_encoding(this);
21885 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21886 %}
21887 ins_pipe( pipe_slow );
21888 %}
21889
21890 // ------------------- Variable Shift -----------------------------
21891 // Byte variable shift
21892 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21893 predicate(Matcher::vector_length(n) <= 8 &&
21894 n->as_ShiftV()->is_var_shift() &&
21895 !VM_Version::supports_avx512bw());
21896 match(Set dst ( LShiftVB src shift));
21897 match(Set dst ( RShiftVB src shift));
21898 match(Set dst (URShiftVB src shift));
21899 effect(TEMP dst, TEMP vtmp);
21900 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21901 ins_encode %{
21902 assert(UseAVX >= 2, "required");
21903
21904 int opcode = this->ideal_Opcode();
21905 int vlen_enc = Assembler::AVX_128bit;
21906 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21907 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21908 %}
21909 ins_pipe( pipe_slow );
21910 %}
21911
21912 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21913 predicate(Matcher::vector_length(n) == 16 &&
21914 n->as_ShiftV()->is_var_shift() &&
21915 !VM_Version::supports_avx512bw());
21916 match(Set dst ( LShiftVB src shift));
21917 match(Set dst ( RShiftVB src shift));
21918 match(Set dst (URShiftVB src shift));
21919 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21920 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21921 ins_encode %{
21922 assert(UseAVX >= 2, "required");
21923
21924 int opcode = this->ideal_Opcode();
21925 int vlen_enc = Assembler::AVX_128bit;
21926 // Shift lower half and get word result in dst
21927 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21928
21929 // Shift upper half and get word result in vtmp1
21930 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21931 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21932 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21933
21934 // Merge and down convert the two word results to byte in dst
21935 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21936 %}
21937 ins_pipe( pipe_slow );
21938 %}
21939
21940 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21941 predicate(Matcher::vector_length(n) == 32 &&
21942 n->as_ShiftV()->is_var_shift() &&
21943 !VM_Version::supports_avx512bw());
21944 match(Set dst ( LShiftVB src shift));
21945 match(Set dst ( RShiftVB src shift));
21946 match(Set dst (URShiftVB src shift));
21947 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21948 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21949 ins_encode %{
21950 assert(UseAVX >= 2, "required");
21951
21952 int opcode = this->ideal_Opcode();
21953 int vlen_enc = Assembler::AVX_128bit;
21954 // Process lower 128 bits and get result in dst
21955 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21956 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21957 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21958 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21959 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21960
21961 // Process higher 128 bits and get result in vtmp3
21962 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21963 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21964 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21965 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21966 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21967 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21968 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21969
21970 // Merge the two results in dst
21971 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21972 %}
21973 ins_pipe( pipe_slow );
21974 %}
21975
21976 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21977 predicate(Matcher::vector_length(n) <= 32 &&
21978 n->as_ShiftV()->is_var_shift() &&
21979 VM_Version::supports_avx512bw());
21980 match(Set dst ( LShiftVB src shift));
21981 match(Set dst ( RShiftVB src shift));
21982 match(Set dst (URShiftVB src shift));
21983 effect(TEMP dst, TEMP vtmp);
21984 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21985 ins_encode %{
21986 assert(UseAVX > 2, "required");
21987
21988 int opcode = this->ideal_Opcode();
21989 int vlen_enc = vector_length_encoding(this);
21990 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21991 %}
21992 ins_pipe( pipe_slow );
21993 %}
21994
21995 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21996 predicate(Matcher::vector_length(n) == 64 &&
21997 n->as_ShiftV()->is_var_shift() &&
21998 VM_Version::supports_avx512bw());
21999 match(Set dst ( LShiftVB src shift));
22000 match(Set dst ( RShiftVB src shift));
22001 match(Set dst (URShiftVB src shift));
22002 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22003 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
22004 ins_encode %{
22005 assert(UseAVX > 2, "required");
22006
22007 int opcode = this->ideal_Opcode();
22008 int vlen_enc = Assembler::AVX_256bit;
22009 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
22010 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
22011 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
22012 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
22013 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
22014 %}
22015 ins_pipe( pipe_slow );
22016 %}
22017
22018 // Short variable shift
22019 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
22020 predicate(Matcher::vector_length(n) <= 8 &&
22021 n->as_ShiftV()->is_var_shift() &&
22022 !VM_Version::supports_avx512bw());
22023 match(Set dst ( LShiftVS src shift));
22024 match(Set dst ( RShiftVS src shift));
22025 match(Set dst (URShiftVS src shift));
22026 effect(TEMP dst, TEMP vtmp);
22027 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22028 ins_encode %{
22029 assert(UseAVX >= 2, "required");
22030
22031 int opcode = this->ideal_Opcode();
22032 bool sign = (opcode != Op_URShiftVS);
22033 int vlen_enc = Assembler::AVX_256bit;
22034 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
22035 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
22036 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22037 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22038 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
22039 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22040 %}
22041 ins_pipe( pipe_slow );
22042 %}
22043
22044 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
22045 predicate(Matcher::vector_length(n) == 16 &&
22046 n->as_ShiftV()->is_var_shift() &&
22047 !VM_Version::supports_avx512bw());
22048 match(Set dst ( LShiftVS src shift));
22049 match(Set dst ( RShiftVS src shift));
22050 match(Set dst (URShiftVS src shift));
22051 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22052 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
22053 ins_encode %{
22054 assert(UseAVX >= 2, "required");
22055
22056 int opcode = this->ideal_Opcode();
22057 bool sign = (opcode != Op_URShiftVS);
22058 int vlen_enc = Assembler::AVX_256bit;
22059 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
22060 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
22061 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22062 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22063 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22064
22065 // Shift upper half, with result in dst using vtmp1 as TEMP
22066 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22067 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22068 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22069 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22070 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22071 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22072
22073 // Merge lower and upper half result into dst
22074 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22075 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22076 %}
22077 ins_pipe( pipe_slow );
22078 %}
22079
22080 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22081 predicate(n->as_ShiftV()->is_var_shift() &&
22082 VM_Version::supports_avx512bw());
22083 match(Set dst ( LShiftVS src shift));
22084 match(Set dst ( RShiftVS src shift));
22085 match(Set dst (URShiftVS src shift));
22086 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22087 ins_encode %{
22088 assert(UseAVX > 2, "required");
22089
22090 int opcode = this->ideal_Opcode();
22091 int vlen_enc = vector_length_encoding(this);
22092 if (!VM_Version::supports_avx512vl()) {
22093 vlen_enc = Assembler::AVX_512bit;
22094 }
22095 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22096 %}
22097 ins_pipe( pipe_slow );
22098 %}
22099
22100 //Integer variable shift
22101 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22102 predicate(n->as_ShiftV()->is_var_shift());
22103 match(Set dst ( LShiftVI src shift));
22104 match(Set dst ( RShiftVI src shift));
22105 match(Set dst (URShiftVI src shift));
22106 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22107 ins_encode %{
22108 assert(UseAVX >= 2, "required");
22109
22110 int opcode = this->ideal_Opcode();
22111 int vlen_enc = vector_length_encoding(this);
22112 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22113 %}
22114 ins_pipe( pipe_slow );
22115 %}
22116
22117 //Long variable shift
22118 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22119 predicate(n->as_ShiftV()->is_var_shift());
22120 match(Set dst ( LShiftVL src shift));
22121 match(Set dst (URShiftVL src shift));
22122 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22123 ins_encode %{
22124 assert(UseAVX >= 2, "required");
22125
22126 int opcode = this->ideal_Opcode();
22127 int vlen_enc = vector_length_encoding(this);
22128 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22129 %}
22130 ins_pipe( pipe_slow );
22131 %}
22132
22133 //Long variable right shift arithmetic
22134 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22135 predicate(Matcher::vector_length(n) <= 4 &&
22136 n->as_ShiftV()->is_var_shift() &&
22137 UseAVX == 2);
22138 match(Set dst (RShiftVL src shift));
22139 effect(TEMP dst, TEMP vtmp);
22140 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22141 ins_encode %{
22142 int opcode = this->ideal_Opcode();
22143 int vlen_enc = vector_length_encoding(this);
22144 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22145 $vtmp$$XMMRegister);
22146 %}
22147 ins_pipe( pipe_slow );
22148 %}
22149
22150 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22151 predicate(n->as_ShiftV()->is_var_shift() &&
22152 UseAVX > 2);
22153 match(Set dst (RShiftVL src shift));
22154 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22155 ins_encode %{
22156 int opcode = this->ideal_Opcode();
22157 int vlen_enc = vector_length_encoding(this);
22158 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22159 %}
22160 ins_pipe( pipe_slow );
22161 %}
22162
22163 // --------------------------------- AND --------------------------------------
22164
22165 instruct vand(vec dst, vec src) %{
22166 predicate(UseAVX == 0);
22167 match(Set dst (AndV dst src));
22168 format %{ "pand $dst,$src\t! and vectors" %}
22169 ins_encode %{
22170 __ pand($dst$$XMMRegister, $src$$XMMRegister);
22171 %}
22172 ins_pipe( pipe_slow );
22173 %}
22174
22175 instruct vand_reg(vec dst, vec src1, vec src2) %{
22176 predicate(UseAVX > 0);
22177 match(Set dst (AndV src1 src2));
22178 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
22179 ins_encode %{
22180 int vlen_enc = vector_length_encoding(this);
22181 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22182 %}
22183 ins_pipe( pipe_slow );
22184 %}
22185
22186 instruct vand_mem(vec dst, vec src, memory mem) %{
22187 predicate((UseAVX > 0) &&
22188 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22189 match(Set dst (AndV src (LoadVector mem)));
22190 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
22191 ins_encode %{
22192 int vlen_enc = vector_length_encoding(this);
22193 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22194 %}
22195 ins_pipe( pipe_slow );
22196 %}
22197
22198 // --------------------------------- OR ---------------------------------------
22199
22200 instruct vor(vec dst, vec src) %{
22201 predicate(UseAVX == 0);
22202 match(Set dst (OrV dst src));
22203 format %{ "por $dst,$src\t! or vectors" %}
22204 ins_encode %{
22205 __ por($dst$$XMMRegister, $src$$XMMRegister);
22206 %}
22207 ins_pipe( pipe_slow );
22208 %}
22209
22210 instruct vor_reg(vec dst, vec src1, vec src2) %{
22211 predicate(UseAVX > 0);
22212 match(Set dst (OrV src1 src2));
22213 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
22214 ins_encode %{
22215 int vlen_enc = vector_length_encoding(this);
22216 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22217 %}
22218 ins_pipe( pipe_slow );
22219 %}
22220
22221 instruct vor_mem(vec dst, vec src, memory mem) %{
22222 predicate((UseAVX > 0) &&
22223 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22224 match(Set dst (OrV src (LoadVector mem)));
22225 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
22226 ins_encode %{
22227 int vlen_enc = vector_length_encoding(this);
22228 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22229 %}
22230 ins_pipe( pipe_slow );
22231 %}
22232
22233 // --------------------------------- XOR --------------------------------------
22234
22235 instruct vxor(vec dst, vec src) %{
22236 predicate(UseAVX == 0);
22237 match(Set dst (XorV dst src));
22238 format %{ "pxor $dst,$src\t! xor vectors" %}
22239 ins_encode %{
22240 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22241 %}
22242 ins_pipe( pipe_slow );
22243 %}
22244
22245 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22246 predicate(UseAVX > 0);
22247 match(Set dst (XorV src1 src2));
22248 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22249 ins_encode %{
22250 int vlen_enc = vector_length_encoding(this);
22251 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22252 %}
22253 ins_pipe( pipe_slow );
22254 %}
22255
22256 instruct vxor_mem(vec dst, vec src, memory mem) %{
22257 predicate((UseAVX > 0) &&
22258 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22259 match(Set dst (XorV src (LoadVector mem)));
22260 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22261 ins_encode %{
22262 int vlen_enc = vector_length_encoding(this);
22263 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22264 %}
22265 ins_pipe( pipe_slow );
22266 %}
22267
22268 // --------------------------------- VectorCast --------------------------------------
22269
22270 instruct vcastBtoX(vec dst, vec src) %{
22271 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22272 match(Set dst (VectorCastB2X src));
22273 format %{ "vector_cast_b2x $dst,$src\t!" %}
22274 ins_encode %{
22275 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22276 int vlen_enc = vector_length_encoding(this);
22277 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22278 %}
22279 ins_pipe( pipe_slow );
22280 %}
22281
22282 instruct vcastBtoD(legVec dst, legVec src) %{
22283 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22284 match(Set dst (VectorCastB2X src));
22285 format %{ "vector_cast_b2x $dst,$src\t!" %}
22286 ins_encode %{
22287 int vlen_enc = vector_length_encoding(this);
22288 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22289 %}
22290 ins_pipe( pipe_slow );
22291 %}
22292
22293 instruct castStoX(vec dst, vec src) %{
22294 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22295 Matcher::vector_length(n->in(1)) <= 8 && // src
22296 Matcher::vector_element_basic_type(n) == T_BYTE);
22297 match(Set dst (VectorCastS2X src));
22298 format %{ "vector_cast_s2x $dst,$src" %}
22299 ins_encode %{
22300 assert(UseAVX > 0, "required");
22301
22302 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22303 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22304 %}
22305 ins_pipe( pipe_slow );
22306 %}
22307
22308 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22309 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22310 Matcher::vector_length(n->in(1)) == 16 && // src
22311 Matcher::vector_element_basic_type(n) == T_BYTE);
22312 effect(TEMP dst, TEMP vtmp);
22313 match(Set dst (VectorCastS2X src));
22314 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22315 ins_encode %{
22316 assert(UseAVX > 0, "required");
22317
22318 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22319 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22320 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22321 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22322 %}
22323 ins_pipe( pipe_slow );
22324 %}
22325
22326 instruct vcastStoX_evex(vec dst, vec src) %{
22327 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22328 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22329 match(Set dst (VectorCastS2X src));
22330 format %{ "vector_cast_s2x $dst,$src\t!" %}
22331 ins_encode %{
22332 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22333 int src_vlen_enc = vector_length_encoding(this, $src);
22334 int vlen_enc = vector_length_encoding(this);
22335 switch (to_elem_bt) {
22336 case T_BYTE:
22337 if (!VM_Version::supports_avx512vl()) {
22338 vlen_enc = Assembler::AVX_512bit;
22339 }
22340 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22341 break;
22342 case T_INT:
22343 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22344 break;
22345 case T_FLOAT:
22346 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22347 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22348 break;
22349 case T_LONG:
22350 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22351 break;
22352 case T_DOUBLE: {
22353 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22354 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22355 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22356 break;
22357 }
22358 default:
22359 ShouldNotReachHere();
22360 }
22361 %}
22362 ins_pipe( pipe_slow );
22363 %}
22364
22365 instruct castItoX(vec dst, vec src) %{
22366 predicate(UseAVX <= 2 &&
22367 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22368 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22369 match(Set dst (VectorCastI2X src));
22370 format %{ "vector_cast_i2x $dst,$src" %}
22371 ins_encode %{
22372 assert(UseAVX > 0, "required");
22373
22374 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22375 int vlen_enc = vector_length_encoding(this, $src);
22376
22377 if (to_elem_bt == T_BYTE) {
22378 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22379 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22380 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22381 } else {
22382 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22383 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22384 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22385 }
22386 %}
22387 ins_pipe( pipe_slow );
22388 %}
22389
22390 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22391 predicate(UseAVX <= 2 &&
22392 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22393 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22394 match(Set dst (VectorCastI2X src));
22395 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22396 effect(TEMP dst, TEMP vtmp);
22397 ins_encode %{
22398 assert(UseAVX > 0, "required");
22399
22400 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22401 int vlen_enc = vector_length_encoding(this, $src);
22402
22403 if (to_elem_bt == T_BYTE) {
22404 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22405 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22406 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22407 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22408 } else {
22409 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22410 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22411 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22412 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22413 }
22414 %}
22415 ins_pipe( pipe_slow );
22416 %}
22417
22418 instruct vcastItoX_evex(vec dst, vec src) %{
22419 predicate(UseAVX > 2 ||
22420 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22421 match(Set dst (VectorCastI2X src));
22422 format %{ "vector_cast_i2x $dst,$src\t!" %}
22423 ins_encode %{
22424 assert(UseAVX > 0, "required");
22425
22426 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22427 int src_vlen_enc = vector_length_encoding(this, $src);
22428 int dst_vlen_enc = vector_length_encoding(this);
22429 switch (dst_elem_bt) {
22430 case T_BYTE:
22431 if (!VM_Version::supports_avx512vl()) {
22432 src_vlen_enc = Assembler::AVX_512bit;
22433 }
22434 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22435 break;
22436 case T_SHORT:
22437 if (!VM_Version::supports_avx512vl()) {
22438 src_vlen_enc = Assembler::AVX_512bit;
22439 }
22440 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22441 break;
22442 case T_FLOAT:
22443 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22444 break;
22445 case T_LONG:
22446 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22447 break;
22448 case T_DOUBLE:
22449 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22450 break;
22451 default:
22452 ShouldNotReachHere();
22453 }
22454 %}
22455 ins_pipe( pipe_slow );
22456 %}
22457
22458 instruct vcastLtoBS(vec dst, vec src) %{
22459 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22460 UseAVX <= 2);
22461 match(Set dst (VectorCastL2X src));
22462 format %{ "vector_cast_l2x $dst,$src" %}
22463 ins_encode %{
22464 assert(UseAVX > 0, "required");
22465
22466 int vlen = Matcher::vector_length_in_bytes(this, $src);
22467 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22468 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22469 : ExternalAddress(vector_int_to_short_mask());
22470 if (vlen <= 16) {
22471 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22472 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22473 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22474 } else {
22475 assert(vlen <= 32, "required");
22476 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22477 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22478 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22479 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22480 }
22481 if (to_elem_bt == T_BYTE) {
22482 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22483 }
22484 %}
22485 ins_pipe( pipe_slow );
22486 %}
22487
22488 instruct vcastLtoX_evex(vec dst, vec src) %{
22489 predicate(UseAVX > 2 ||
22490 (Matcher::vector_element_basic_type(n) == T_INT ||
22491 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22492 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22493 match(Set dst (VectorCastL2X src));
22494 format %{ "vector_cast_l2x $dst,$src\t!" %}
22495 ins_encode %{
22496 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22497 int vlen = Matcher::vector_length_in_bytes(this, $src);
22498 int vlen_enc = vector_length_encoding(this, $src);
22499 switch (to_elem_bt) {
22500 case T_BYTE:
22501 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22502 vlen_enc = Assembler::AVX_512bit;
22503 }
22504 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22505 break;
22506 case T_SHORT:
22507 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22508 vlen_enc = Assembler::AVX_512bit;
22509 }
22510 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22511 break;
22512 case T_INT:
22513 if (vlen == 8) {
22514 if ($dst$$XMMRegister != $src$$XMMRegister) {
22515 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22516 }
22517 } else if (vlen == 16) {
22518 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22519 } else if (vlen == 32) {
22520 if (UseAVX > 2) {
22521 if (!VM_Version::supports_avx512vl()) {
22522 vlen_enc = Assembler::AVX_512bit;
22523 }
22524 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22525 } else {
22526 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22527 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22528 }
22529 } else { // vlen == 64
22530 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22531 }
22532 break;
22533 case T_FLOAT:
22534 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22535 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22536 break;
22537 case T_DOUBLE:
22538 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22539 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22540 break;
22541
22542 default: assert(false, "%s", type2name(to_elem_bt));
22543 }
22544 %}
22545 ins_pipe( pipe_slow );
22546 %}
22547
22548 instruct vcastFtoD_reg(vec dst, vec src) %{
22549 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22550 match(Set dst (VectorCastF2X src));
22551 format %{ "vector_cast_f2d $dst,$src\t!" %}
22552 ins_encode %{
22553 int vlen_enc = vector_length_encoding(this);
22554 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22555 %}
22556 ins_pipe( pipe_slow );
22557 %}
22558
22559
22560 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22561 predicate(!VM_Version::supports_avx10_2() &&
22562 !VM_Version::supports_avx512vl() &&
22563 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22564 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22565 is_integral_type(Matcher::vector_element_basic_type(n)));
22566 match(Set dst (VectorCastF2X src));
22567 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22568 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22569 ins_encode %{
22570 int vlen_enc = vector_length_encoding(this, $src);
22571 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22572 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22573 // 32 bit addresses for register indirect addressing mode since stub constants
22574 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22575 // However, targets are free to increase this limit, but having a large code cache size
22576 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22577 // cap we save a temporary register allocation which in limiting case can prevent
22578 // spilling in high register pressure blocks.
22579 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22580 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22581 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22582 %}
22583 ins_pipe( pipe_slow );
22584 %}
22585
22586 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22587 predicate(!VM_Version::supports_avx10_2() &&
22588 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22589 is_integral_type(Matcher::vector_element_basic_type(n)));
22590 match(Set dst (VectorCastF2X src));
22591 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22592 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22593 ins_encode %{
22594 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22595 if (to_elem_bt == T_LONG) {
22596 int vlen_enc = vector_length_encoding(this);
22597 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22598 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22599 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22600 } else {
22601 int vlen_enc = vector_length_encoding(this, $src);
22602 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22603 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22604 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22605 }
22606 %}
22607 ins_pipe( pipe_slow );
22608 %}
22609
22610 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22611 predicate(VM_Version::supports_avx10_2() &&
22612 is_integral_type(Matcher::vector_element_basic_type(n)));
22613 match(Set dst (VectorCastF2X src));
22614 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22615 ins_encode %{
22616 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22617 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22618 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22619 %}
22620 ins_pipe( pipe_slow );
22621 %}
22622
22623 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22624 predicate(VM_Version::supports_avx10_2() &&
22625 is_integral_type(Matcher::vector_element_basic_type(n)));
22626 match(Set dst (VectorCastF2X (LoadVector src)));
22627 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22628 ins_encode %{
22629 int vlen = Matcher::vector_length(this);
22630 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22631 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22632 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22633 %}
22634 ins_pipe( pipe_slow );
22635 %}
22636
22637 instruct vcastDtoF_reg(vec dst, vec src) %{
22638 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22639 match(Set dst (VectorCastD2X src));
22640 format %{ "vector_cast_d2x $dst,$src\t!" %}
22641 ins_encode %{
22642 int vlen_enc = vector_length_encoding(this, $src);
22643 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22644 %}
22645 ins_pipe( pipe_slow );
22646 %}
22647
22648 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22649 predicate(!VM_Version::supports_avx10_2() &&
22650 !VM_Version::supports_avx512vl() &&
22651 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22652 is_integral_type(Matcher::vector_element_basic_type(n)));
22653 match(Set dst (VectorCastD2X src));
22654 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22655 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22656 ins_encode %{
22657 int vlen_enc = vector_length_encoding(this, $src);
22658 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22659 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22660 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22661 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22662 %}
22663 ins_pipe( pipe_slow );
22664 %}
22665
22666 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22667 predicate(!VM_Version::supports_avx10_2() &&
22668 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22669 is_integral_type(Matcher::vector_element_basic_type(n)));
22670 match(Set dst (VectorCastD2X src));
22671 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22672 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22673 ins_encode %{
22674 int vlen_enc = vector_length_encoding(this, $src);
22675 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22676 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22677 ExternalAddress(vector_float_signflip());
22678 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22679 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22680 %}
22681 ins_pipe( pipe_slow );
22682 %}
22683
22684 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22685 predicate(VM_Version::supports_avx10_2() &&
22686 is_integral_type(Matcher::vector_element_basic_type(n)));
22687 match(Set dst (VectorCastD2X src));
22688 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22689 ins_encode %{
22690 int vlen_enc = vector_length_encoding(this, $src);
22691 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22692 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22693 %}
22694 ins_pipe( pipe_slow );
22695 %}
22696
22697 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22698 predicate(VM_Version::supports_avx10_2() &&
22699 is_integral_type(Matcher::vector_element_basic_type(n)));
22700 match(Set dst (VectorCastD2X (LoadVector src)));
22701 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22702 ins_encode %{
22703 int vlen = Matcher::vector_length(this);
22704 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22705 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22706 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22707 %}
22708 ins_pipe( pipe_slow );
22709 %}
22710
22711 instruct vucast(vec dst, vec src) %{
22712 match(Set dst (VectorUCastB2X src));
22713 match(Set dst (VectorUCastS2X src));
22714 match(Set dst (VectorUCastI2X src));
22715 format %{ "vector_ucast $dst,$src\t!" %}
22716 ins_encode %{
22717 assert(UseAVX > 0, "required");
22718
22719 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22720 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22721 int vlen_enc = vector_length_encoding(this);
22722 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22723 %}
22724 ins_pipe( pipe_slow );
22725 %}
22726
22727 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22728 predicate(!VM_Version::supports_avx512vl() &&
22729 Matcher::vector_length_in_bytes(n) < 64 &&
22730 Matcher::vector_element_basic_type(n) == T_INT);
22731 match(Set dst (RoundVF src));
22732 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22733 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22734 ins_encode %{
22735 int vlen_enc = vector_length_encoding(this);
22736 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22737 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22738 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22739 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22740 %}
22741 ins_pipe( pipe_slow );
22742 %}
22743
22744 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22745 predicate((VM_Version::supports_avx512vl() ||
22746 Matcher::vector_length_in_bytes(n) == 64) &&
22747 Matcher::vector_element_basic_type(n) == T_INT);
22748 match(Set dst (RoundVF src));
22749 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22750 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22751 ins_encode %{
22752 int vlen_enc = vector_length_encoding(this);
22753 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22754 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22755 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22756 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22757 %}
22758 ins_pipe( pipe_slow );
22759 %}
22760
22761 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22762 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22763 match(Set dst (RoundVD src));
22764 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22765 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22766 ins_encode %{
22767 int vlen_enc = vector_length_encoding(this);
22768 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22769 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22770 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22771 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22772 %}
22773 ins_pipe( pipe_slow );
22774 %}
22775
22776 // --------------------------------- VectorMaskCmp --------------------------------------
22777
22778 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22779 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22780 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22781 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22782 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22783 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22784 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22785 ins_encode %{
22786 int vlen_enc = vector_length_encoding(this, $src1);
22787 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22788 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22789 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22790 } else {
22791 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22792 }
22793 %}
22794 ins_pipe( pipe_slow );
22795 %}
22796
22797 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22798 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22799 n->bottom_type()->isa_pvectmask() == nullptr &&
22800 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22801 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22802 effect(TEMP ktmp);
22803 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22804 ins_encode %{
22805 int vlen_enc = Assembler::AVX_512bit;
22806 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22807 KRegister mask = k0; // The comparison itself is not being masked.
22808 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22809 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22810 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22811 } else {
22812 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22813 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22814 }
22815 %}
22816 ins_pipe( pipe_slow );
22817 %}
22818
22819 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22820 predicate(n->bottom_type()->isa_pvectmask() &&
22821 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22822 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22823 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22824 ins_encode %{
22825 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22826 int vlen_enc = vector_length_encoding(this, $src1);
22827 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22828 KRegister mask = k0; // The comparison itself is not being masked.
22829 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22830 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22831 } else {
22832 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22833 }
22834 %}
22835 ins_pipe( pipe_slow );
22836 %}
22837
22838 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22839 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22840 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22841 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22842 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22843 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22844 (n->in(2)->get_int() == BoolTest::eq ||
22845 n->in(2)->get_int() == BoolTest::lt ||
22846 n->in(2)->get_int() == BoolTest::gt)); // cond
22847 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22848 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22849 ins_encode %{
22850 int vlen_enc = vector_length_encoding(this, $src1);
22851 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22852 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22853 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22854 %}
22855 ins_pipe( pipe_slow );
22856 %}
22857
22858 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22859 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22860 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22861 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22862 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22863 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22864 (n->in(2)->get_int() == BoolTest::ne ||
22865 n->in(2)->get_int() == BoolTest::le ||
22866 n->in(2)->get_int() == BoolTest::ge)); // cond
22867 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22868 effect(TEMP dst, TEMP xtmp);
22869 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22870 ins_encode %{
22871 int vlen_enc = vector_length_encoding(this, $src1);
22872 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22873 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22874 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22875 %}
22876 ins_pipe( pipe_slow );
22877 %}
22878
22879 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22880 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22881 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22882 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22883 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22884 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22885 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22886 effect(TEMP dst, TEMP xtmp);
22887 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22888 ins_encode %{
22889 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22890 int vlen_enc = vector_length_encoding(this, $src1);
22891 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22892 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22893
22894 if (vlen_enc == Assembler::AVX_128bit) {
22895 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22896 } else {
22897 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22898 }
22899 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22900 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22901 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22902 %}
22903 ins_pipe( pipe_slow );
22904 %}
22905
22906 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22907 predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22908 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22909 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22910 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22911 effect(TEMP ktmp);
22912 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22913 ins_encode %{
22914 assert(UseAVX > 2, "required");
22915
22916 int vlen_enc = vector_length_encoding(this, $src1);
22917 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22918 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22919 KRegister mask = k0; // The comparison itself is not being masked.
22920 bool merge = false;
22921 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22922
22923 switch (src1_elem_bt) {
22924 case T_INT: {
22925 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22926 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22927 break;
22928 }
22929 case T_LONG: {
22930 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22931 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22932 break;
22933 }
22934 default: assert(false, "%s", type2name(src1_elem_bt));
22935 }
22936 %}
22937 ins_pipe( pipe_slow );
22938 %}
22939
22940
22941 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22942 predicate(n->bottom_type()->isa_pvectmask() &&
22943 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22944 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22945 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22946 ins_encode %{
22947 assert(UseAVX > 2, "required");
22948 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22949
22950 int vlen_enc = vector_length_encoding(this, $src1);
22951 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22952 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22953 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22954
22955 // Comparison i
22956 switch (src1_elem_bt) {
22957 case T_BYTE: {
22958 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22959 break;
22960 }
22961 case T_SHORT: {
22962 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22963 break;
22964 }
22965 case T_INT: {
22966 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22967 break;
22968 }
22969 case T_LONG: {
22970 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22971 break;
22972 }
22973 default: assert(false, "%s", type2name(src1_elem_bt));
22974 }
22975 %}
22976 ins_pipe( pipe_slow );
22977 %}
22978
22979 // Extract
22980
22981 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22982 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22983 match(Set dst (ExtractI src idx));
22984 match(Set dst (ExtractS src idx));
22985 match(Set dst (ExtractB src idx));
22986 format %{ "extractI $dst,$src,$idx\t!" %}
22987 ins_encode %{
22988 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22989
22990 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22991 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22992 %}
22993 ins_pipe( pipe_slow );
22994 %}
22995
22996 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22997 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22998 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22999 match(Set dst (ExtractI src idx));
23000 match(Set dst (ExtractS src idx));
23001 match(Set dst (ExtractB src idx));
23002 effect(TEMP vtmp);
23003 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
23004 ins_encode %{
23005 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23006
23007 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
23008 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23009 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
23010 %}
23011 ins_pipe( pipe_slow );
23012 %}
23013
23014 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
23015 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
23016 match(Set dst (ExtractL src idx));
23017 format %{ "extractL $dst,$src,$idx\t!" %}
23018 ins_encode %{
23019 assert(UseSSE >= 4, "required");
23020 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23021
23022 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
23023 %}
23024 ins_pipe( pipe_slow );
23025 %}
23026
23027 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
23028 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23029 Matcher::vector_length(n->in(1)) == 8); // src
23030 match(Set dst (ExtractL src idx));
23031 effect(TEMP vtmp);
23032 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
23033 ins_encode %{
23034 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23035
23036 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23037 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
23038 %}
23039 ins_pipe( pipe_slow );
23040 %}
23041
23042 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23043 predicate(Matcher::vector_length(n->in(1)) <= 4);
23044 match(Set dst (ExtractF src idx));
23045 effect(TEMP dst, TEMP vtmp);
23046 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23047 ins_encode %{
23048 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23049
23050 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
23051 %}
23052 ins_pipe( pipe_slow );
23053 %}
23054
23055 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
23056 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
23057 Matcher::vector_length(n->in(1)/*src*/) == 16);
23058 match(Set dst (ExtractF src idx));
23059 effect(TEMP vtmp);
23060 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
23061 ins_encode %{
23062 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23063
23064 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23065 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23066 %}
23067 ins_pipe( pipe_slow );
23068 %}
23069
23070 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23071 predicate(Matcher::vector_length(n->in(1)) == 2); // src
23072 match(Set dst (ExtractD src idx));
23073 format %{ "extractD $dst,$src,$idx\t!" %}
23074 ins_encode %{
23075 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23076
23077 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23078 %}
23079 ins_pipe( pipe_slow );
23080 %}
23081
23082 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23083 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23084 Matcher::vector_length(n->in(1)) == 8); // src
23085 match(Set dst (ExtractD src idx));
23086 effect(TEMP vtmp);
23087 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23088 ins_encode %{
23089 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23090
23091 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23092 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23093 %}
23094 ins_pipe( pipe_slow );
23095 %}
23096
23097 // --------------------------------- Vector Blend --------------------------------------
23098
23099 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23100 predicate(UseAVX == 0);
23101 match(Set dst (VectorBlend (Binary dst src) mask));
23102 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
23103 effect(TEMP tmp);
23104 ins_encode %{
23105 assert(UseSSE >= 4, "required");
23106
23107 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23108 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23109 }
23110 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23111 %}
23112 ins_pipe( pipe_slow );
23113 %}
23114
23115 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23116 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23117 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
23118 Matcher::vector_length_in_bytes(n) <= 32 &&
23119 is_integral_type(Matcher::vector_element_basic_type(n)));
23120 match(Set dst (VectorBlend (Binary src1 src2) mask));
23121 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23122 ins_encode %{
23123 int vlen_enc = vector_length_encoding(this);
23124 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23125 %}
23126 ins_pipe( pipe_slow );
23127 %}
23128
23129 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23130 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23131 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
23132 Matcher::vector_length_in_bytes(n) <= 32 &&
23133 !is_integral_type(Matcher::vector_element_basic_type(n)));
23134 match(Set dst (VectorBlend (Binary src1 src2) mask));
23135 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23136 ins_encode %{
23137 int vlen_enc = vector_length_encoding(this);
23138 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23139 %}
23140 ins_pipe( pipe_slow );
23141 %}
23142
23143 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23144 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23145 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
23146 Matcher::vector_length_in_bytes(n) <= 32);
23147 match(Set dst (VectorBlend (Binary src1 src2) mask));
23148 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23149 effect(TEMP vtmp, TEMP dst);
23150 ins_encode %{
23151 int vlen_enc = vector_length_encoding(this);
23152 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23153 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23154 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23155 %}
23156 ins_pipe( pipe_slow );
23157 %}
23158
23159 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23160 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23161 n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
23162 match(Set dst (VectorBlend (Binary src1 src2) mask));
23163 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23164 effect(TEMP ktmp);
23165 ins_encode %{
23166 int vlen_enc = Assembler::AVX_512bit;
23167 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23168 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23169 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23170 %}
23171 ins_pipe( pipe_slow );
23172 %}
23173
23174
23175 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23176 predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
23177 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23178 VM_Version::supports_avx512bw()));
23179 match(Set dst (VectorBlend (Binary src1 src2) mask));
23180 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23181 ins_encode %{
23182 int vlen_enc = vector_length_encoding(this);
23183 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23184 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23185 %}
23186 ins_pipe( pipe_slow );
23187 %}
23188
23189 // --------------------------------- ABS --------------------------------------
23190 // a = |a|
23191 instruct vabsB_reg(vec dst, vec src) %{
23192 match(Set dst (AbsVB src));
23193 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23194 ins_encode %{
23195 uint vlen = Matcher::vector_length(this);
23196 if (vlen <= 16) {
23197 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23198 } else {
23199 int vlen_enc = vector_length_encoding(this);
23200 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23201 }
23202 %}
23203 ins_pipe( pipe_slow );
23204 %}
23205
23206 instruct vabsS_reg(vec dst, vec src) %{
23207 match(Set dst (AbsVS src));
23208 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23209 ins_encode %{
23210 uint vlen = Matcher::vector_length(this);
23211 if (vlen <= 8) {
23212 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23213 } else {
23214 int vlen_enc = vector_length_encoding(this);
23215 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23216 }
23217 %}
23218 ins_pipe( pipe_slow );
23219 %}
23220
23221 instruct vabsI_reg(vec dst, vec src) %{
23222 match(Set dst (AbsVI src));
23223 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23224 ins_encode %{
23225 uint vlen = Matcher::vector_length(this);
23226 if (vlen <= 4) {
23227 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23228 } else {
23229 int vlen_enc = vector_length_encoding(this);
23230 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23231 }
23232 %}
23233 ins_pipe( pipe_slow );
23234 %}
23235
23236 instruct vabsL_reg(vec dst, vec src) %{
23237 match(Set dst (AbsVL src));
23238 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23239 ins_encode %{
23240 assert(UseAVX > 2, "required");
23241 int vlen_enc = vector_length_encoding(this);
23242 if (!VM_Version::supports_avx512vl()) {
23243 vlen_enc = Assembler::AVX_512bit;
23244 }
23245 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23246 %}
23247 ins_pipe( pipe_slow );
23248 %}
23249
23250 // --------------------------------- ABSNEG --------------------------------------
23251
23252 instruct vabsnegF(vec dst, vec src) %{
23253 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23254 match(Set dst (AbsVF src));
23255 match(Set dst (NegVF src));
23256 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23257 ins_cost(150);
23258 ins_encode %{
23259 int opcode = this->ideal_Opcode();
23260 int vlen = Matcher::vector_length(this);
23261 if (vlen == 2) {
23262 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23263 } else {
23264 assert(vlen == 8 || vlen == 16, "required");
23265 int vlen_enc = vector_length_encoding(this);
23266 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23267 }
23268 %}
23269 ins_pipe( pipe_slow );
23270 %}
23271
23272 instruct vabsneg4F(vec dst) %{
23273 predicate(Matcher::vector_length(n) == 4);
23274 match(Set dst (AbsVF dst));
23275 match(Set dst (NegVF dst));
23276 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23277 ins_cost(150);
23278 ins_encode %{
23279 int opcode = this->ideal_Opcode();
23280 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23281 %}
23282 ins_pipe( pipe_slow );
23283 %}
23284
23285 instruct vabsnegD(vec dst, vec src) %{
23286 match(Set dst (AbsVD src));
23287 match(Set dst (NegVD src));
23288 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23289 ins_encode %{
23290 int opcode = this->ideal_Opcode();
23291 uint vlen = Matcher::vector_length(this);
23292 if (vlen == 2) {
23293 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23294 } else {
23295 int vlen_enc = vector_length_encoding(this);
23296 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23297 }
23298 %}
23299 ins_pipe( pipe_slow );
23300 %}
23301
23302 //------------------------------------- VectorTest --------------------------------------------
23303
23304 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23305 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23306 match(Set cr (VectorTest src1 src2));
23307 effect(TEMP vtmp);
23308 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23309 ins_encode %{
23310 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23311 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23312 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23313 %}
23314 ins_pipe( pipe_slow );
23315 %}
23316
23317 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23318 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23319 match(Set cr (VectorTest src1 src2));
23320 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23321 ins_encode %{
23322 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23323 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23324 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23325 %}
23326 ins_pipe( pipe_slow );
23327 %}
23328
23329 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23330 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23331 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23332 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23333 match(Set cr (VectorTest src1 src2));
23334 effect(TEMP tmp);
23335 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23336 ins_encode %{
23337 uint masklen = Matcher::vector_length(this, $src1);
23338 __ kmovwl($tmp$$Register, $src1$$KRegister);
23339 __ andl($tmp$$Register, (1 << masklen) - 1);
23340 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23341 %}
23342 ins_pipe( pipe_slow );
23343 %}
23344
23345 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23346 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23347 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23348 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23349 match(Set cr (VectorTest src1 src2));
23350 effect(TEMP tmp);
23351 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23352 ins_encode %{
23353 uint masklen = Matcher::vector_length(this, $src1);
23354 __ kmovwl($tmp$$Register, $src1$$KRegister);
23355 __ andl($tmp$$Register, (1 << masklen) - 1);
23356 %}
23357 ins_pipe( pipe_slow );
23358 %}
23359
23360 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23361 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23362 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23363 match(Set cr (VectorTest src1 src2));
23364 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23365 ins_encode %{
23366 uint masklen = Matcher::vector_length(this, $src1);
23367 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23368 %}
23369 ins_pipe( pipe_slow );
23370 %}
23371
23372 //------------------------------------- LoadMask --------------------------------------------
23373
23374 instruct loadMask(legVec dst, legVec src) %{
23375 predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23376 match(Set dst (VectorLoadMask src));
23377 effect(TEMP dst);
23378 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23379 ins_encode %{
23380 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23381 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23382 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23383 %}
23384 ins_pipe( pipe_slow );
23385 %}
23386
23387 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23388 predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23389 match(Set dst (VectorLoadMask src));
23390 effect(TEMP xtmp);
23391 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23392 ins_encode %{
23393 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23394 true, Assembler::AVX_512bit);
23395 %}
23396 ins_pipe( pipe_slow );
23397 %}
23398
23399 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23400 predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23401 match(Set dst (VectorLoadMask src));
23402 effect(TEMP xtmp);
23403 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23404 ins_encode %{
23405 int vlen_enc = vector_length_encoding(in(1));
23406 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23407 false, vlen_enc);
23408 %}
23409 ins_pipe( pipe_slow );
23410 %}
23411
23412 //------------------------------------- StoreMask --------------------------------------------
23413
23414 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23415 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23416 match(Set dst (VectorStoreMask src size));
23417 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23418 ins_encode %{
23419 int vlen = Matcher::vector_length(this);
23420 if (vlen <= 16 && UseAVX <= 2) {
23421 assert(UseSSE >= 3, "required");
23422 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23423 } else {
23424 assert(UseAVX > 0, "required");
23425 int src_vlen_enc = vector_length_encoding(this, $src);
23426 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23427 }
23428 %}
23429 ins_pipe( pipe_slow );
23430 %}
23431
23432 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23433 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23434 match(Set dst (VectorStoreMask src size));
23435 effect(TEMP_DEF dst, TEMP xtmp);
23436 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23437 ins_encode %{
23438 int vlen_enc = Assembler::AVX_128bit;
23439 int vlen = Matcher::vector_length(this);
23440 if (vlen <= 8) {
23441 assert(UseSSE >= 3, "required");
23442 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23443 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23444 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23445 } else {
23446 assert(UseAVX > 0, "required");
23447 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23448 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23449 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23450 }
23451 %}
23452 ins_pipe( pipe_slow );
23453 %}
23454
23455 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23456 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23457 match(Set dst (VectorStoreMask src size));
23458 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23459 effect(TEMP_DEF dst, TEMP xtmp);
23460 ins_encode %{
23461 int vlen_enc = Assembler::AVX_128bit;
23462 int vlen = Matcher::vector_length(this);
23463 if (vlen <= 4) {
23464 assert(UseSSE >= 3, "required");
23465 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23466 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23467 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23468 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23469 } else {
23470 assert(UseAVX > 0, "required");
23471 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23472 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23473 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23474 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23475 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23476 }
23477 %}
23478 ins_pipe( pipe_slow );
23479 %}
23480
23481 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23482 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23483 match(Set dst (VectorStoreMask src size));
23484 effect(TEMP_DEF dst, TEMP xtmp);
23485 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23486 ins_encode %{
23487 assert(UseSSE >= 3, "required");
23488 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23489 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23490 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23491 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23492 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23493 %}
23494 ins_pipe( pipe_slow );
23495 %}
23496
23497 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23498 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23499 match(Set dst (VectorStoreMask src size));
23500 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23501 effect(TEMP_DEF dst, TEMP vtmp);
23502 ins_encode %{
23503 int vlen_enc = Assembler::AVX_128bit;
23504 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23505 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23506 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23507 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23508 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23509 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23510 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23511 %}
23512 ins_pipe( pipe_slow );
23513 %}
23514
23515 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23516 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23517 match(Set dst (VectorStoreMask src size));
23518 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23519 ins_encode %{
23520 int src_vlen_enc = vector_length_encoding(this, $src);
23521 int dst_vlen_enc = vector_length_encoding(this);
23522 if (!VM_Version::supports_avx512vl()) {
23523 src_vlen_enc = Assembler::AVX_512bit;
23524 }
23525 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23526 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23527 %}
23528 ins_pipe( pipe_slow );
23529 %}
23530
23531 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23532 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23533 match(Set dst (VectorStoreMask src size));
23534 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23535 ins_encode %{
23536 int src_vlen_enc = vector_length_encoding(this, $src);
23537 int dst_vlen_enc = vector_length_encoding(this);
23538 if (!VM_Version::supports_avx512vl()) {
23539 src_vlen_enc = Assembler::AVX_512bit;
23540 }
23541 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23542 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23543 %}
23544 ins_pipe( pipe_slow );
23545 %}
23546
23547 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23548 predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23549 match(Set dst (VectorStoreMask mask size));
23550 effect(TEMP_DEF dst);
23551 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23552 ins_encode %{
23553 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23554 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23555 false, Assembler::AVX_512bit, noreg);
23556 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23557 %}
23558 ins_pipe( pipe_slow );
23559 %}
23560
23561 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23562 predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23563 match(Set dst (VectorStoreMask mask size));
23564 effect(TEMP_DEF dst);
23565 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23566 ins_encode %{
23567 int dst_vlen_enc = vector_length_encoding(this);
23568 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23569 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23570 %}
23571 ins_pipe( pipe_slow );
23572 %}
23573
23574 instruct vmaskcast_evex(kReg dst) %{
23575 match(Set dst (VectorMaskCast dst));
23576 ins_cost(0);
23577 format %{ "vector_mask_cast $dst" %}
23578 ins_encode %{
23579 // empty
23580 %}
23581 ins_pipe(empty);
23582 %}
23583
23584 instruct vmaskcast(vec dst) %{
23585 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23586 match(Set dst (VectorMaskCast dst));
23587 ins_cost(0);
23588 format %{ "vector_mask_cast $dst" %}
23589 ins_encode %{
23590 // empty
23591 %}
23592 ins_pipe(empty);
23593 %}
23594
23595 instruct vmaskcast_avx(vec dst, vec src) %{
23596 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23597 match(Set dst (VectorMaskCast src));
23598 format %{ "vector_mask_cast $dst, $src" %}
23599 ins_encode %{
23600 int vlen = Matcher::vector_length(this);
23601 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23602 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23603 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23604 %}
23605 ins_pipe(pipe_slow);
23606 %}
23607
23608 //-------------------------------- Load Iota Indices ----------------------------------
23609
23610 instruct loadIotaIndices(vec dst, immI_0 src) %{
23611 match(Set dst (VectorLoadConst src));
23612 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23613 ins_encode %{
23614 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23615 BasicType bt = Matcher::vector_element_basic_type(this);
23616 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23617 %}
23618 ins_pipe( pipe_slow );
23619 %}
23620
23621 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23622 match(Set dst (PopulateIndex src1 src2));
23623 effect(TEMP dst, TEMP vtmp);
23624 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23625 ins_encode %{
23626 assert($src2$$constant == 1, "required");
23627 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23628 int vlen_enc = vector_length_encoding(this);
23629 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23630 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23631 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23632 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23633 %}
23634 ins_pipe( pipe_slow );
23635 %}
23636
23637 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23638 match(Set dst (PopulateIndex src1 src2));
23639 effect(TEMP dst, TEMP vtmp);
23640 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23641 ins_encode %{
23642 assert($src2$$constant == 1, "required");
23643 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23644 int vlen_enc = vector_length_encoding(this);
23645 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23646 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23647 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23648 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23649 %}
23650 ins_pipe( pipe_slow );
23651 %}
23652
23653 //-------------------------------- Rearrange ----------------------------------
23654
23655 // LoadShuffle/Rearrange for Byte
23656 instruct rearrangeB(vec dst, vec shuffle) %{
23657 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23658 Matcher::vector_length(n) < 32);
23659 match(Set dst (VectorRearrange dst shuffle));
23660 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23661 ins_encode %{
23662 assert(UseSSE >= 4, "required");
23663 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23664 %}
23665 ins_pipe( pipe_slow );
23666 %}
23667
23668 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23669 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23670 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23671 match(Set dst (VectorRearrange src shuffle));
23672 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23673 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23674 ins_encode %{
23675 assert(UseAVX >= 2, "required");
23676 // Swap src into vtmp1
23677 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23678 // Shuffle swapped src to get entries from other 128 bit lane
23679 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23680 // Shuffle original src to get entries from self 128 bit lane
23681 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23682 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23683 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23684 // Perform the blend
23685 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23686 %}
23687 ins_pipe( pipe_slow );
23688 %}
23689
23690
23691 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23692 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23693 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23694 match(Set dst (VectorRearrange src shuffle));
23695 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23696 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23697 ins_encode %{
23698 int vlen_enc = vector_length_encoding(this);
23699 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23700 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23701 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23702 %}
23703 ins_pipe( pipe_slow );
23704 %}
23705
23706 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23707 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23708 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23709 match(Set dst (VectorRearrange src shuffle));
23710 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23711 ins_encode %{
23712 int vlen_enc = vector_length_encoding(this);
23713 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23714 %}
23715 ins_pipe( pipe_slow );
23716 %}
23717
23718 // LoadShuffle/Rearrange for Short
23719
23720 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23721 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23722 !VM_Version::supports_avx512bw());
23723 match(Set dst (VectorLoadShuffle src));
23724 effect(TEMP dst, TEMP vtmp);
23725 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23726 ins_encode %{
23727 // Create a byte shuffle mask from short shuffle mask
23728 // only byte shuffle instruction available on these platforms
23729 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23730 if (UseAVX == 0) {
23731 assert(vlen_in_bytes <= 16, "required");
23732 // Multiply each shuffle by two to get byte index
23733 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23734 __ psllw($vtmp$$XMMRegister, 1);
23735
23736 // Duplicate to create 2 copies of byte index
23737 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23738 __ psllw($dst$$XMMRegister, 8);
23739 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23740
23741 // Add one to get alternate byte index
23742 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23743 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23744 } else {
23745 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23746 int vlen_enc = vector_length_encoding(this);
23747 // Multiply each shuffle by two to get byte index
23748 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23749
23750 // Duplicate to create 2 copies of byte index
23751 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23752 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23753
23754 // Add one to get alternate byte index
23755 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23756 }
23757 %}
23758 ins_pipe( pipe_slow );
23759 %}
23760
23761 instruct rearrangeS(vec dst, vec shuffle) %{
23762 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23763 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23764 match(Set dst (VectorRearrange dst shuffle));
23765 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23766 ins_encode %{
23767 assert(UseSSE >= 4, "required");
23768 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23769 %}
23770 ins_pipe( pipe_slow );
23771 %}
23772
23773 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23774 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23775 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23776 match(Set dst (VectorRearrange src shuffle));
23777 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23778 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23779 ins_encode %{
23780 assert(UseAVX >= 2, "required");
23781 // Swap src into vtmp1
23782 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23783 // Shuffle swapped src to get entries from other 128 bit lane
23784 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23785 // Shuffle original src to get entries from self 128 bit lane
23786 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23787 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23788 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23789 // Perform the blend
23790 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23791 %}
23792 ins_pipe( pipe_slow );
23793 %}
23794
23795 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23796 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23797 VM_Version::supports_avx512bw());
23798 match(Set dst (VectorRearrange src shuffle));
23799 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23800 ins_encode %{
23801 int vlen_enc = vector_length_encoding(this);
23802 if (!VM_Version::supports_avx512vl()) {
23803 vlen_enc = Assembler::AVX_512bit;
23804 }
23805 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23806 %}
23807 ins_pipe( pipe_slow );
23808 %}
23809
23810 // LoadShuffle/Rearrange for Integer and Float
23811
23812 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23813 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23814 Matcher::vector_length(n) == 4 && UseAVX == 0);
23815 match(Set dst (VectorLoadShuffle src));
23816 effect(TEMP dst, TEMP vtmp);
23817 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23818 ins_encode %{
23819 assert(UseSSE >= 4, "required");
23820
23821 // Create a byte shuffle mask from int shuffle mask
23822 // only byte shuffle instruction available on these platforms
23823
23824 // Duplicate and multiply each shuffle by 4
23825 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23826 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23827 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23828 __ psllw($vtmp$$XMMRegister, 2);
23829
23830 // Duplicate again to create 4 copies of byte index
23831 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23832 __ psllw($dst$$XMMRegister, 8);
23833 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23834
23835 // Add 3,2,1,0 to get alternate byte index
23836 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23837 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23838 %}
23839 ins_pipe( pipe_slow );
23840 %}
23841
23842 instruct rearrangeI(vec dst, vec shuffle) %{
23843 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23844 UseAVX == 0);
23845 match(Set dst (VectorRearrange dst shuffle));
23846 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23847 ins_encode %{
23848 assert(UseSSE >= 4, "required");
23849 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23850 %}
23851 ins_pipe( pipe_slow );
23852 %}
23853
23854 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23855 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23856 UseAVX > 0);
23857 match(Set dst (VectorRearrange src shuffle));
23858 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23859 ins_encode %{
23860 int vlen_enc = vector_length_encoding(this);
23861 BasicType bt = Matcher::vector_element_basic_type(this);
23862 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23863 %}
23864 ins_pipe( pipe_slow );
23865 %}
23866
23867 // LoadShuffle/Rearrange for Long and Double
23868
23869 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23870 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23871 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23872 match(Set dst (VectorLoadShuffle src));
23873 effect(TEMP dst, TEMP vtmp);
23874 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23875 ins_encode %{
23876 assert(UseAVX >= 2, "required");
23877
23878 int vlen_enc = vector_length_encoding(this);
23879 // Create a double word shuffle mask from long shuffle mask
23880 // only double word shuffle instruction available on these platforms
23881
23882 // Multiply each shuffle by two to get double word index
23883 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23884
23885 // Duplicate each double word shuffle
23886 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23887 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23888
23889 // Add one to get alternate double word index
23890 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23891 %}
23892 ins_pipe( pipe_slow );
23893 %}
23894
23895 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23896 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23897 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23898 match(Set dst (VectorRearrange src shuffle));
23899 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23900 ins_encode %{
23901 assert(UseAVX >= 2, "required");
23902
23903 int vlen_enc = vector_length_encoding(this);
23904 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23905 %}
23906 ins_pipe( pipe_slow );
23907 %}
23908
23909 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23910 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23911 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23912 match(Set dst (VectorRearrange src shuffle));
23913 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23914 ins_encode %{
23915 assert(UseAVX > 2, "required");
23916
23917 int vlen_enc = vector_length_encoding(this);
23918 if (vlen_enc == Assembler::AVX_128bit) {
23919 vlen_enc = Assembler::AVX_256bit;
23920 }
23921 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23922 %}
23923 ins_pipe( pipe_slow );
23924 %}
23925
23926 // --------------------------------- FMA --------------------------------------
23927 // a * b + c
23928
23929 instruct vfmaF_reg(vec a, vec b, vec c) %{
23930 match(Set c (FmaVF c (Binary a b)));
23931 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23932 ins_cost(150);
23933 ins_encode %{
23934 assert(UseFMA, "not enabled");
23935 int vlen_enc = vector_length_encoding(this);
23936 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23937 %}
23938 ins_pipe( pipe_slow );
23939 %}
23940
23941 instruct vfmaF_mem(vec a, memory b, vec c) %{
23942 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23943 match(Set c (FmaVF c (Binary a (LoadVector b))));
23944 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23945 ins_cost(150);
23946 ins_encode %{
23947 assert(UseFMA, "not enabled");
23948 int vlen_enc = vector_length_encoding(this);
23949 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23950 %}
23951 ins_pipe( pipe_slow );
23952 %}
23953
23954 instruct vfmaD_reg(vec a, vec b, vec c) %{
23955 match(Set c (FmaVD c (Binary a b)));
23956 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23957 ins_cost(150);
23958 ins_encode %{
23959 assert(UseFMA, "not enabled");
23960 int vlen_enc = vector_length_encoding(this);
23961 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23962 %}
23963 ins_pipe( pipe_slow );
23964 %}
23965
23966 instruct vfmaD_mem(vec a, memory b, vec c) %{
23967 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23968 match(Set c (FmaVD c (Binary a (LoadVector b))));
23969 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23970 ins_cost(150);
23971 ins_encode %{
23972 assert(UseFMA, "not enabled");
23973 int vlen_enc = vector_length_encoding(this);
23974 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23975 %}
23976 ins_pipe( pipe_slow );
23977 %}
23978
23979 // --------------------------------- Vector Multiply Add --------------------------------------
23980
23981 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23982 predicate(UseAVX == 0);
23983 match(Set dst (MulAddVS2VI dst src1));
23984 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23985 ins_encode %{
23986 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23987 %}
23988 ins_pipe( pipe_slow );
23989 %}
23990
23991 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23992 predicate(UseAVX > 0);
23993 match(Set dst (MulAddVS2VI src1 src2));
23994 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23995 ins_encode %{
23996 int vlen_enc = vector_length_encoding(this);
23997 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23998 %}
23999 ins_pipe( pipe_slow );
24000 %}
24001
24002 // --------------------------------- Vector Multiply Add Add ----------------------------------
24003
24004 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
24005 predicate(VM_Version::supports_avx512_vnni());
24006 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
24007 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
24008 ins_encode %{
24009 assert(UseAVX > 2, "required");
24010 int vlen_enc = vector_length_encoding(this);
24011 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24012 %}
24013 ins_pipe( pipe_slow );
24014 ins_cost(10);
24015 %}
24016
24017 // --------------------------------- PopCount --------------------------------------
24018
24019 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
24020 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24021 match(Set dst (PopCountVI src));
24022 match(Set dst (PopCountVL src));
24023 format %{ "vector_popcount_integral $dst, $src" %}
24024 ins_encode %{
24025 int opcode = this->ideal_Opcode();
24026 int vlen_enc = vector_length_encoding(this, $src);
24027 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24028 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
24029 %}
24030 ins_pipe( pipe_slow );
24031 %}
24032
24033 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
24034 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24035 match(Set dst (PopCountVI src mask));
24036 match(Set dst (PopCountVL src mask));
24037 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
24038 ins_encode %{
24039 int vlen_enc = vector_length_encoding(this, $src);
24040 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24041 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24042 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
24043 %}
24044 ins_pipe( pipe_slow );
24045 %}
24046
24047 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
24048 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
24049 match(Set dst (PopCountVI src));
24050 match(Set dst (PopCountVL src));
24051 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24052 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
24053 ins_encode %{
24054 int opcode = this->ideal_Opcode();
24055 int vlen_enc = vector_length_encoding(this, $src);
24056 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24057 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24058 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
24059 %}
24060 ins_pipe( pipe_slow );
24061 %}
24062
24063 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24064
24065 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24066 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24067 Matcher::vector_length_in_bytes(n->in(1))));
24068 match(Set dst (CountTrailingZerosV src));
24069 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24070 ins_cost(400);
24071 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24072 ins_encode %{
24073 int vlen_enc = vector_length_encoding(this, $src);
24074 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24075 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24076 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24077 %}
24078 ins_pipe( pipe_slow );
24079 %}
24080
24081 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24082 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24083 VM_Version::supports_avx512cd() &&
24084 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24085 match(Set dst (CountTrailingZerosV src));
24086 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24087 ins_cost(400);
24088 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24089 ins_encode %{
24090 int vlen_enc = vector_length_encoding(this, $src);
24091 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24092 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24093 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24094 %}
24095 ins_pipe( pipe_slow );
24096 %}
24097
24098 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24099 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24100 match(Set dst (CountTrailingZerosV src));
24101 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24102 ins_cost(400);
24103 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24104 ins_encode %{
24105 int vlen_enc = vector_length_encoding(this, $src);
24106 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24107 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24108 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24109 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24110 %}
24111 ins_pipe( pipe_slow );
24112 %}
24113
24114 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24115 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24116 match(Set dst (CountTrailingZerosV src));
24117 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24118 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24119 ins_encode %{
24120 int vlen_enc = vector_length_encoding(this, $src);
24121 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24122 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24123 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24124 %}
24125 ins_pipe( pipe_slow );
24126 %}
24127
24128
24129 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24130
24131 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24132 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24133 effect(TEMP dst);
24134 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24135 ins_encode %{
24136 int vector_len = vector_length_encoding(this);
24137 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24138 %}
24139 ins_pipe( pipe_slow );
24140 %}
24141
24142 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24143 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24144 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24145 effect(TEMP dst);
24146 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24147 ins_encode %{
24148 int vector_len = vector_length_encoding(this);
24149 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24150 %}
24151 ins_pipe( pipe_slow );
24152 %}
24153
24154 // --------------------------------- Rotation Operations ----------------------------------
24155 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24156 match(Set dst (RotateLeftV src shift));
24157 match(Set dst (RotateRightV src shift));
24158 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24159 ins_encode %{
24160 int opcode = this->ideal_Opcode();
24161 int vector_len = vector_length_encoding(this);
24162 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24163 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24164 %}
24165 ins_pipe( pipe_slow );
24166 %}
24167
24168 instruct vprorate(vec dst, vec src, vec shift) %{
24169 match(Set dst (RotateLeftV src shift));
24170 match(Set dst (RotateRightV src shift));
24171 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24172 ins_encode %{
24173 int opcode = this->ideal_Opcode();
24174 int vector_len = vector_length_encoding(this);
24175 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24176 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24177 %}
24178 ins_pipe( pipe_slow );
24179 %}
24180
24181 // ---------------------------------- Masked Operations ------------------------------------
24182 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24183 predicate(!n->in(3)->bottom_type()->isa_pvectmask());
24184 match(Set dst (LoadVectorMasked mem mask));
24185 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24186 ins_encode %{
24187 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24188 int vlen_enc = vector_length_encoding(this);
24189 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24190 %}
24191 ins_pipe( pipe_slow );
24192 %}
24193
24194
24195 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24196 predicate(n->in(3)->bottom_type()->isa_pvectmask());
24197 match(Set dst (LoadVectorMasked mem mask));
24198 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24199 ins_encode %{
24200 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24201 int vector_len = vector_length_encoding(this);
24202 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24203 %}
24204 ins_pipe( pipe_slow );
24205 %}
24206
24207 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24208 predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
24209 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24210 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24211 ins_encode %{
24212 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24213 int vlen_enc = vector_length_encoding(src_node);
24214 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24215 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24216 %}
24217 ins_pipe( pipe_slow );
24218 %}
24219
24220 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24221 predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
24222 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24223 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24224 ins_encode %{
24225 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24226 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24227 int vlen_enc = vector_length_encoding(src_node);
24228 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24229 %}
24230 ins_pipe( pipe_slow );
24231 %}
24232
24233 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24234 match(Set addr (VerifyVectorAlignment addr mask));
24235 effect(KILL cr);
24236 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24237 ins_encode %{
24238 Label Lskip;
24239 // check if masked bits of addr are zero
24240 __ testq($addr$$Register, $mask$$constant);
24241 __ jccb(Assembler::equal, Lskip);
24242 __ stop("verify_vector_alignment found a misaligned vector memory access");
24243 __ bind(Lskip);
24244 %}
24245 ins_pipe(pipe_slow);
24246 %}
24247
24248 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24249 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24250 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24251 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24252 ins_encode %{
24253 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24254 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24255
24256 Label DONE;
24257 int vlen_enc = vector_length_encoding(this, $src1);
24258 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24259
24260 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24261 __ mov64($dst$$Register, -1L);
24262 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24263 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24264 __ jccb(Assembler::carrySet, DONE);
24265 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24266 __ notq($dst$$Register);
24267 __ tzcntq($dst$$Register, $dst$$Register);
24268 __ bind(DONE);
24269 %}
24270 ins_pipe( pipe_slow );
24271 %}
24272
24273
24274 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24275 match(Set dst (VectorMaskGen len));
24276 effect(TEMP temp, KILL cr);
24277 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24278 ins_encode %{
24279 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24280 %}
24281 ins_pipe( pipe_slow );
24282 %}
24283
24284 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24285 match(Set dst (VectorMaskGen len));
24286 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24287 effect(TEMP temp);
24288 ins_encode %{
24289 if ($len$$constant > 0) {
24290 __ mov64($temp$$Register, right_n_bits($len$$constant));
24291 __ kmovql($dst$$KRegister, $temp$$Register);
24292 } else {
24293 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
24294 }
24295 %}
24296 ins_pipe( pipe_slow );
24297 %}
24298
24299 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24300 predicate(n->in(1)->bottom_type()->isa_pvectmask());
24301 match(Set dst (VectorMaskToLong mask));
24302 effect(TEMP dst, KILL cr);
24303 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24304 ins_encode %{
24305 int opcode = this->ideal_Opcode();
24306 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24307 int mask_len = Matcher::vector_length(this, $mask);
24308 int mask_size = mask_len * type2aelembytes(mbt);
24309 int vlen_enc = vector_length_encoding(this, $mask);
24310 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24311 $dst$$Register, mask_len, mask_size, vlen_enc);
24312 %}
24313 ins_pipe( pipe_slow );
24314 %}
24315
24316 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24317 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24318 match(Set dst (VectorMaskToLong mask));
24319 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24320 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24321 ins_encode %{
24322 int opcode = this->ideal_Opcode();
24323 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24324 int mask_len = Matcher::vector_length(this, $mask);
24325 int vlen_enc = vector_length_encoding(this, $mask);
24326 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24327 $dst$$Register, mask_len, mbt, vlen_enc);
24328 %}
24329 ins_pipe( pipe_slow );
24330 %}
24331
24332 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24333 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24334 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24335 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24336 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24337 ins_encode %{
24338 int opcode = this->ideal_Opcode();
24339 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24340 int mask_len = Matcher::vector_length(this, $mask);
24341 int vlen_enc = vector_length_encoding(this, $mask);
24342 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24343 $dst$$Register, mask_len, mbt, vlen_enc);
24344 %}
24345 ins_pipe( pipe_slow );
24346 %}
24347
24348 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24349 predicate(n->in(1)->bottom_type()->isa_pvectmask());
24350 match(Set dst (VectorMaskTrueCount mask));
24351 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24352 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24353 ins_encode %{
24354 int opcode = this->ideal_Opcode();
24355 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24356 int mask_len = Matcher::vector_length(this, $mask);
24357 int mask_size = mask_len * type2aelembytes(mbt);
24358 int vlen_enc = vector_length_encoding(this, $mask);
24359 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24360 $tmp$$Register, mask_len, mask_size, vlen_enc);
24361 %}
24362 ins_pipe( pipe_slow );
24363 %}
24364
24365 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24366 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24367 match(Set dst (VectorMaskTrueCount mask));
24368 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24369 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24370 ins_encode %{
24371 int opcode = this->ideal_Opcode();
24372 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24373 int mask_len = Matcher::vector_length(this, $mask);
24374 int vlen_enc = vector_length_encoding(this, $mask);
24375 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24376 $tmp$$Register, mask_len, mbt, vlen_enc);
24377 %}
24378 ins_pipe( pipe_slow );
24379 %}
24380
24381 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24382 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24383 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24384 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24385 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24386 ins_encode %{
24387 int opcode = this->ideal_Opcode();
24388 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24389 int mask_len = Matcher::vector_length(this, $mask);
24390 int vlen_enc = vector_length_encoding(this, $mask);
24391 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24392 $tmp$$Register, mask_len, mbt, vlen_enc);
24393 %}
24394 ins_pipe( pipe_slow );
24395 %}
24396
24397 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24398 predicate(n->in(1)->bottom_type()->isa_pvectmask());
24399 match(Set dst (VectorMaskFirstTrue mask));
24400 match(Set dst (VectorMaskLastTrue mask));
24401 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24402 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24403 ins_encode %{
24404 int opcode = this->ideal_Opcode();
24405 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24406 int mask_len = Matcher::vector_length(this, $mask);
24407 int mask_size = mask_len * type2aelembytes(mbt);
24408 int vlen_enc = vector_length_encoding(this, $mask);
24409 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24410 $tmp$$Register, mask_len, mask_size, vlen_enc);
24411 %}
24412 ins_pipe( pipe_slow );
24413 %}
24414
24415 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24416 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24417 match(Set dst (VectorMaskFirstTrue mask));
24418 match(Set dst (VectorMaskLastTrue mask));
24419 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24420 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24421 ins_encode %{
24422 int opcode = this->ideal_Opcode();
24423 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24424 int mask_len = Matcher::vector_length(this, $mask);
24425 int vlen_enc = vector_length_encoding(this, $mask);
24426 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24427 $tmp$$Register, mask_len, mbt, vlen_enc);
24428 %}
24429 ins_pipe( pipe_slow );
24430 %}
24431
24432 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24433 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24434 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24435 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24436 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24437 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24438 ins_encode %{
24439 int opcode = this->ideal_Opcode();
24440 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24441 int mask_len = Matcher::vector_length(this, $mask);
24442 int vlen_enc = vector_length_encoding(this, $mask);
24443 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24444 $tmp$$Register, mask_len, mbt, vlen_enc);
24445 %}
24446 ins_pipe( pipe_slow );
24447 %}
24448
24449 // --------------------------------- Compress/Expand Operations ---------------------------
24450 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24451 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24452 match(Set dst (CompressV src mask));
24453 match(Set dst (ExpandV src mask));
24454 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24455 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24456 ins_encode %{
24457 int opcode = this->ideal_Opcode();
24458 int vlen_enc = vector_length_encoding(this);
24459 BasicType bt = Matcher::vector_element_basic_type(this);
24460 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24461 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24462 %}
24463 ins_pipe( pipe_slow );
24464 %}
24465
24466 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24467 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24468 match(Set dst (CompressV src mask));
24469 match(Set dst (ExpandV src mask));
24470 format %{ "vector_compress_expand $dst, $src, $mask" %}
24471 ins_encode %{
24472 int opcode = this->ideal_Opcode();
24473 int vector_len = vector_length_encoding(this);
24474 BasicType bt = Matcher::vector_element_basic_type(this);
24475 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24476 %}
24477 ins_pipe( pipe_slow );
24478 %}
24479
24480 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24481 match(Set dst (CompressM mask));
24482 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24483 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24484 ins_encode %{
24485 assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
24486 int mask_len = Matcher::vector_length(this);
24487 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24488 %}
24489 ins_pipe( pipe_slow );
24490 %}
24491
24492 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24493
24494 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24495 predicate(!VM_Version::supports_gfni());
24496 match(Set dst (ReverseV src));
24497 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24498 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24499 ins_encode %{
24500 int vec_enc = vector_length_encoding(this);
24501 BasicType bt = Matcher::vector_element_basic_type(this);
24502 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24503 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24504 %}
24505 ins_pipe( pipe_slow );
24506 %}
24507
24508 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24509 predicate(VM_Version::supports_gfni());
24510 match(Set dst (ReverseV src));
24511 effect(TEMP dst, TEMP xtmp);
24512 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24513 ins_encode %{
24514 int vec_enc = vector_length_encoding(this);
24515 BasicType bt = Matcher::vector_element_basic_type(this);
24516 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24517 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24518 $xtmp$$XMMRegister);
24519 %}
24520 ins_pipe( pipe_slow );
24521 %}
24522
24523 instruct vreverse_byte_reg(vec dst, vec src) %{
24524 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24525 match(Set dst (ReverseBytesV src));
24526 effect(TEMP dst);
24527 format %{ "vector_reverse_byte $dst, $src" %}
24528 ins_encode %{
24529 int vec_enc = vector_length_encoding(this);
24530 BasicType bt = Matcher::vector_element_basic_type(this);
24531 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24532 %}
24533 ins_pipe( pipe_slow );
24534 %}
24535
24536 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24537 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24538 match(Set dst (ReverseBytesV src));
24539 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24540 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24541 ins_encode %{
24542 int vec_enc = vector_length_encoding(this);
24543 BasicType bt = Matcher::vector_element_basic_type(this);
24544 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24545 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24546 %}
24547 ins_pipe( pipe_slow );
24548 %}
24549
24550 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24551
24552 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24553 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24554 Matcher::vector_length_in_bytes(n->in(1))));
24555 match(Set dst (CountLeadingZerosV src));
24556 format %{ "vector_count_leading_zeros $dst, $src" %}
24557 ins_encode %{
24558 int vlen_enc = vector_length_encoding(this, $src);
24559 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24560 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24561 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24562 %}
24563 ins_pipe( pipe_slow );
24564 %}
24565
24566 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24567 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24568 Matcher::vector_length_in_bytes(n->in(1))));
24569 match(Set dst (CountLeadingZerosV src mask));
24570 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24571 ins_encode %{
24572 int vlen_enc = vector_length_encoding(this, $src);
24573 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24574 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24575 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24576 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24577 %}
24578 ins_pipe( pipe_slow );
24579 %}
24580
24581 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24582 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24583 VM_Version::supports_avx512cd() &&
24584 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24585 match(Set dst (CountLeadingZerosV src));
24586 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24587 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24588 ins_encode %{
24589 int vlen_enc = vector_length_encoding(this, $src);
24590 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24591 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24592 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24593 %}
24594 ins_pipe( pipe_slow );
24595 %}
24596
24597 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24598 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24599 match(Set dst (CountLeadingZerosV src));
24600 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24601 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24602 ins_encode %{
24603 int vlen_enc = vector_length_encoding(this, $src);
24604 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24605 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24606 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24607 $rtmp$$Register, true, vlen_enc);
24608 %}
24609 ins_pipe( pipe_slow );
24610 %}
24611
24612 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24613 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24614 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24615 match(Set dst (CountLeadingZerosV src));
24616 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24617 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24618 ins_encode %{
24619 int vlen_enc = vector_length_encoding(this, $src);
24620 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24621 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24622 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24623 %}
24624 ins_pipe( pipe_slow );
24625 %}
24626
24627 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24628 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24629 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24630 match(Set dst (CountLeadingZerosV src));
24631 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24632 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24633 ins_encode %{
24634 int vlen_enc = vector_length_encoding(this, $src);
24635 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24636 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24637 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24638 %}
24639 ins_pipe( pipe_slow );
24640 %}
24641
24642 // ---------------------------------- Vector Masked Operations ------------------------------------
24643
24644 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24645 match(Set dst (AddVB (Binary dst src2) mask));
24646 match(Set dst (AddVS (Binary dst src2) mask));
24647 match(Set dst (AddVI (Binary dst src2) mask));
24648 match(Set dst (AddVL (Binary dst src2) mask));
24649 match(Set dst (AddVF (Binary dst src2) mask));
24650 match(Set dst (AddVD (Binary dst src2) mask));
24651 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24652 ins_encode %{
24653 int vlen_enc = vector_length_encoding(this);
24654 BasicType bt = Matcher::vector_element_basic_type(this);
24655 int opc = this->ideal_Opcode();
24656 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24657 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24658 %}
24659 ins_pipe( pipe_slow );
24660 %}
24661
24662 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24663 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24664 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24665 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24666 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24667 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24668 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24669 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24670 ins_encode %{
24671 int vlen_enc = vector_length_encoding(this);
24672 BasicType bt = Matcher::vector_element_basic_type(this);
24673 int opc = this->ideal_Opcode();
24674 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24675 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24676 %}
24677 ins_pipe( pipe_slow );
24678 %}
24679
24680 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24681 match(Set dst (XorV (Binary dst src2) mask));
24682 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24683 ins_encode %{
24684 int vlen_enc = vector_length_encoding(this);
24685 BasicType bt = Matcher::vector_element_basic_type(this);
24686 int opc = this->ideal_Opcode();
24687 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24688 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24689 %}
24690 ins_pipe( pipe_slow );
24691 %}
24692
24693 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24694 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24695 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24696 ins_encode %{
24697 int vlen_enc = vector_length_encoding(this);
24698 BasicType bt = Matcher::vector_element_basic_type(this);
24699 int opc = this->ideal_Opcode();
24700 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24701 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24702 %}
24703 ins_pipe( pipe_slow );
24704 %}
24705
24706 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24707 match(Set dst (OrV (Binary dst src2) mask));
24708 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24709 ins_encode %{
24710 int vlen_enc = vector_length_encoding(this);
24711 BasicType bt = Matcher::vector_element_basic_type(this);
24712 int opc = this->ideal_Opcode();
24713 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24714 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24715 %}
24716 ins_pipe( pipe_slow );
24717 %}
24718
24719 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24720 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24721 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24722 ins_encode %{
24723 int vlen_enc = vector_length_encoding(this);
24724 BasicType bt = Matcher::vector_element_basic_type(this);
24725 int opc = this->ideal_Opcode();
24726 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24727 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24728 %}
24729 ins_pipe( pipe_slow );
24730 %}
24731
24732 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24733 match(Set dst (AndV (Binary dst src2) mask));
24734 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24735 ins_encode %{
24736 int vlen_enc = vector_length_encoding(this);
24737 BasicType bt = Matcher::vector_element_basic_type(this);
24738 int opc = this->ideal_Opcode();
24739 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24740 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24741 %}
24742 ins_pipe( pipe_slow );
24743 %}
24744
24745 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24746 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24747 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24748 ins_encode %{
24749 int vlen_enc = vector_length_encoding(this);
24750 BasicType bt = Matcher::vector_element_basic_type(this);
24751 int opc = this->ideal_Opcode();
24752 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24753 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24754 %}
24755 ins_pipe( pipe_slow );
24756 %}
24757
24758 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24759 match(Set dst (SubVB (Binary dst src2) mask));
24760 match(Set dst (SubVS (Binary dst src2) mask));
24761 match(Set dst (SubVI (Binary dst src2) mask));
24762 match(Set dst (SubVL (Binary dst src2) mask));
24763 match(Set dst (SubVF (Binary dst src2) mask));
24764 match(Set dst (SubVD (Binary dst src2) mask));
24765 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24766 ins_encode %{
24767 int vlen_enc = vector_length_encoding(this);
24768 BasicType bt = Matcher::vector_element_basic_type(this);
24769 int opc = this->ideal_Opcode();
24770 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24771 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24772 %}
24773 ins_pipe( pipe_slow );
24774 %}
24775
24776 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24777 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24778 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24779 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24780 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24781 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24782 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24783 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24784 ins_encode %{
24785 int vlen_enc = vector_length_encoding(this);
24786 BasicType bt = Matcher::vector_element_basic_type(this);
24787 int opc = this->ideal_Opcode();
24788 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24789 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24790 %}
24791 ins_pipe( pipe_slow );
24792 %}
24793
24794 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24795 match(Set dst (MulVS (Binary dst src2) mask));
24796 match(Set dst (MulVI (Binary dst src2) mask));
24797 match(Set dst (MulVL (Binary dst src2) mask));
24798 match(Set dst (MulVF (Binary dst src2) mask));
24799 match(Set dst (MulVD (Binary dst src2) mask));
24800 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24801 ins_encode %{
24802 int vlen_enc = vector_length_encoding(this);
24803 BasicType bt = Matcher::vector_element_basic_type(this);
24804 int opc = this->ideal_Opcode();
24805 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24806 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24807 %}
24808 ins_pipe( pipe_slow );
24809 %}
24810
24811 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24812 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24813 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24814 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24815 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24816 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24817 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24818 ins_encode %{
24819 int vlen_enc = vector_length_encoding(this);
24820 BasicType bt = Matcher::vector_element_basic_type(this);
24821 int opc = this->ideal_Opcode();
24822 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24823 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24824 %}
24825 ins_pipe( pipe_slow );
24826 %}
24827
24828 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24829 match(Set dst (SqrtVF dst mask));
24830 match(Set dst (SqrtVD dst mask));
24831 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24832 ins_encode %{
24833 int vlen_enc = vector_length_encoding(this);
24834 BasicType bt = Matcher::vector_element_basic_type(this);
24835 int opc = this->ideal_Opcode();
24836 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24837 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24838 %}
24839 ins_pipe( pipe_slow );
24840 %}
24841
24842 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24843 match(Set dst (DivVF (Binary dst src2) mask));
24844 match(Set dst (DivVD (Binary dst src2) mask));
24845 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24846 ins_encode %{
24847 int vlen_enc = vector_length_encoding(this);
24848 BasicType bt = Matcher::vector_element_basic_type(this);
24849 int opc = this->ideal_Opcode();
24850 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24851 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24852 %}
24853 ins_pipe( pipe_slow );
24854 %}
24855
24856 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24857 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24858 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24859 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24860 ins_encode %{
24861 int vlen_enc = vector_length_encoding(this);
24862 BasicType bt = Matcher::vector_element_basic_type(this);
24863 int opc = this->ideal_Opcode();
24864 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24865 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24866 %}
24867 ins_pipe( pipe_slow );
24868 %}
24869
24870
24871 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24872 match(Set dst (RotateLeftV (Binary dst shift) mask));
24873 match(Set dst (RotateRightV (Binary dst shift) mask));
24874 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24875 ins_encode %{
24876 int vlen_enc = vector_length_encoding(this);
24877 BasicType bt = Matcher::vector_element_basic_type(this);
24878 int opc = this->ideal_Opcode();
24879 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24880 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24881 %}
24882 ins_pipe( pipe_slow );
24883 %}
24884
24885 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24886 match(Set dst (RotateLeftV (Binary dst src2) mask));
24887 match(Set dst (RotateRightV (Binary dst src2) mask));
24888 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24889 ins_encode %{
24890 int vlen_enc = vector_length_encoding(this);
24891 BasicType bt = Matcher::vector_element_basic_type(this);
24892 int opc = this->ideal_Opcode();
24893 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24894 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24895 %}
24896 ins_pipe( pipe_slow );
24897 %}
24898
24899 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24900 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24901 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24902 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24903 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24904 ins_encode %{
24905 int vlen_enc = vector_length_encoding(this);
24906 BasicType bt = Matcher::vector_element_basic_type(this);
24907 int opc = this->ideal_Opcode();
24908 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24909 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24910 %}
24911 ins_pipe( pipe_slow );
24912 %}
24913
24914 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24915 predicate(!n->as_ShiftV()->is_var_shift());
24916 match(Set dst (LShiftVS (Binary dst src2) mask));
24917 match(Set dst (LShiftVI (Binary dst src2) mask));
24918 match(Set dst (LShiftVL (Binary dst src2) mask));
24919 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24920 ins_encode %{
24921 int vlen_enc = vector_length_encoding(this);
24922 BasicType bt = Matcher::vector_element_basic_type(this);
24923 int opc = this->ideal_Opcode();
24924 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24925 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24926 %}
24927 ins_pipe( pipe_slow );
24928 %}
24929
24930 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24931 predicate(n->as_ShiftV()->is_var_shift());
24932 match(Set dst (LShiftVS (Binary dst src2) mask));
24933 match(Set dst (LShiftVI (Binary dst src2) mask));
24934 match(Set dst (LShiftVL (Binary dst src2) mask));
24935 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24936 ins_encode %{
24937 int vlen_enc = vector_length_encoding(this);
24938 BasicType bt = Matcher::vector_element_basic_type(this);
24939 int opc = this->ideal_Opcode();
24940 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24941 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24942 %}
24943 ins_pipe( pipe_slow );
24944 %}
24945
24946 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24947 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24948 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24949 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24950 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24951 ins_encode %{
24952 int vlen_enc = vector_length_encoding(this);
24953 BasicType bt = Matcher::vector_element_basic_type(this);
24954 int opc = this->ideal_Opcode();
24955 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24956 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24957 %}
24958 ins_pipe( pipe_slow );
24959 %}
24960
24961 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24962 predicate(!n->as_ShiftV()->is_var_shift());
24963 match(Set dst (RShiftVS (Binary dst src2) mask));
24964 match(Set dst (RShiftVI (Binary dst src2) mask));
24965 match(Set dst (RShiftVL (Binary dst src2) mask));
24966 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24967 ins_encode %{
24968 int vlen_enc = vector_length_encoding(this);
24969 BasicType bt = Matcher::vector_element_basic_type(this);
24970 int opc = this->ideal_Opcode();
24971 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24972 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24973 %}
24974 ins_pipe( pipe_slow );
24975 %}
24976
24977 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24978 predicate(n->as_ShiftV()->is_var_shift());
24979 match(Set dst (RShiftVS (Binary dst src2) mask));
24980 match(Set dst (RShiftVI (Binary dst src2) mask));
24981 match(Set dst (RShiftVL (Binary dst src2) mask));
24982 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24983 ins_encode %{
24984 int vlen_enc = vector_length_encoding(this);
24985 BasicType bt = Matcher::vector_element_basic_type(this);
24986 int opc = this->ideal_Opcode();
24987 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24988 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24989 %}
24990 ins_pipe( pipe_slow );
24991 %}
24992
24993 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24994 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24995 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24996 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24997 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24998 ins_encode %{
24999 int vlen_enc = vector_length_encoding(this);
25000 BasicType bt = Matcher::vector_element_basic_type(this);
25001 int opc = this->ideal_Opcode();
25002 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25003 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
25004 %}
25005 ins_pipe( pipe_slow );
25006 %}
25007
25008 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
25009 predicate(!n->as_ShiftV()->is_var_shift());
25010 match(Set dst (URShiftVS (Binary dst src2) mask));
25011 match(Set dst (URShiftVI (Binary dst src2) mask));
25012 match(Set dst (URShiftVL (Binary dst src2) mask));
25013 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25014 ins_encode %{
25015 int vlen_enc = vector_length_encoding(this);
25016 BasicType bt = Matcher::vector_element_basic_type(this);
25017 int opc = this->ideal_Opcode();
25018 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25019 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
25020 %}
25021 ins_pipe( pipe_slow );
25022 %}
25023
25024 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
25025 predicate(n->as_ShiftV()->is_var_shift());
25026 match(Set dst (URShiftVS (Binary dst src2) mask));
25027 match(Set dst (URShiftVI (Binary dst src2) mask));
25028 match(Set dst (URShiftVL (Binary dst src2) mask));
25029 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
25030 ins_encode %{
25031 int vlen_enc = vector_length_encoding(this);
25032 BasicType bt = Matcher::vector_element_basic_type(this);
25033 int opc = this->ideal_Opcode();
25034 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25035 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
25036 %}
25037 ins_pipe( pipe_slow );
25038 %}
25039
25040 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
25041 match(Set dst (MaxV (Binary dst src2) mask));
25042 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25043 ins_encode %{
25044 int vlen_enc = vector_length_encoding(this);
25045 BasicType bt = Matcher::vector_element_basic_type(this);
25046 int opc = this->ideal_Opcode();
25047 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25048 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25049 %}
25050 ins_pipe( pipe_slow );
25051 %}
25052
25053 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
25054 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
25055 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
25056 ins_encode %{
25057 int vlen_enc = vector_length_encoding(this);
25058 BasicType bt = Matcher::vector_element_basic_type(this);
25059 int opc = this->ideal_Opcode();
25060 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25061 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25062 %}
25063 ins_pipe( pipe_slow );
25064 %}
25065
25066 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25067 match(Set dst (MinV (Binary dst src2) mask));
25068 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25069 ins_encode %{
25070 int vlen_enc = vector_length_encoding(this);
25071 BasicType bt = Matcher::vector_element_basic_type(this);
25072 int opc = this->ideal_Opcode();
25073 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25074 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25075 %}
25076 ins_pipe( pipe_slow );
25077 %}
25078
25079 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25080 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25081 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25082 ins_encode %{
25083 int vlen_enc = vector_length_encoding(this);
25084 BasicType bt = Matcher::vector_element_basic_type(this);
25085 int opc = this->ideal_Opcode();
25086 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25087 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25088 %}
25089 ins_pipe( pipe_slow );
25090 %}
25091
25092 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25093 match(Set dst (VectorRearrange (Binary dst src2) mask));
25094 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25095 ins_encode %{
25096 int vlen_enc = vector_length_encoding(this);
25097 BasicType bt = Matcher::vector_element_basic_type(this);
25098 int opc = this->ideal_Opcode();
25099 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25100 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25101 %}
25102 ins_pipe( pipe_slow );
25103 %}
25104
25105 instruct vabs_masked(vec dst, kReg mask) %{
25106 match(Set dst (AbsVB dst mask));
25107 match(Set dst (AbsVS dst mask));
25108 match(Set dst (AbsVI dst mask));
25109 match(Set dst (AbsVL dst mask));
25110 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25111 ins_encode %{
25112 int vlen_enc = vector_length_encoding(this);
25113 BasicType bt = Matcher::vector_element_basic_type(this);
25114 int opc = this->ideal_Opcode();
25115 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25116 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25117 %}
25118 ins_pipe( pipe_slow );
25119 %}
25120
25121 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25122 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25123 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25124 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25125 ins_encode %{
25126 assert(UseFMA, "Needs FMA instructions support.");
25127 int vlen_enc = vector_length_encoding(this);
25128 BasicType bt = Matcher::vector_element_basic_type(this);
25129 int opc = this->ideal_Opcode();
25130 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25131 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25132 %}
25133 ins_pipe( pipe_slow );
25134 %}
25135
25136 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25137 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25138 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25139 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25140 ins_encode %{
25141 assert(UseFMA, "Needs FMA instructions support.");
25142 int vlen_enc = vector_length_encoding(this);
25143 BasicType bt = Matcher::vector_element_basic_type(this);
25144 int opc = this->ideal_Opcode();
25145 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25146 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25147 %}
25148 ins_pipe( pipe_slow );
25149 %}
25150
25151 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25152 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25153 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25154 ins_encode %{
25155 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
25156 int vlen_enc = vector_length_encoding(this, $src1);
25157 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25158
25159 // Comparison i
25160 switch (src1_elem_bt) {
25161 case T_BYTE: {
25162 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25163 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25164 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25165 break;
25166 }
25167 case T_SHORT: {
25168 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25169 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25170 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25171 break;
25172 }
25173 case T_INT: {
25174 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25175 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25176 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25177 break;
25178 }
25179 case T_LONG: {
25180 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25181 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25182 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25183 break;
25184 }
25185 case T_FLOAT: {
25186 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25187 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25188 break;
25189 }
25190 case T_DOUBLE: {
25191 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25192 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25193 break;
25194 }
25195 default: assert(false, "%s", type2name(src1_elem_bt)); break;
25196 }
25197 %}
25198 ins_pipe( pipe_slow );
25199 %}
25200
25201 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25202 predicate(Matcher::vector_length(n) <= 32);
25203 match(Set dst (MaskAll src));
25204 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25205 ins_encode %{
25206 int mask_len = Matcher::vector_length(this);
25207 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25208 %}
25209 ins_pipe( pipe_slow );
25210 %}
25211
25212 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25213 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25214 match(Set dst (XorVMask src (MaskAll cnt)));
25215 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25216 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25217 ins_encode %{
25218 uint masklen = Matcher::vector_length(this);
25219 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25220 %}
25221 ins_pipe( pipe_slow );
25222 %}
25223
25224 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25225 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25226 (Matcher::vector_length(n) == 16) ||
25227 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25228 match(Set dst (XorVMask src (MaskAll cnt)));
25229 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25230 ins_encode %{
25231 uint masklen = Matcher::vector_length(this);
25232 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25233 %}
25234 ins_pipe( pipe_slow );
25235 %}
25236
25237 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
25238 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
25239 match(Set dst (VectorLongToMask src));
25240 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
25241 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
25242 ins_encode %{
25243 int mask_len = Matcher::vector_length(this);
25244 int vec_enc = vector_length_encoding(mask_len);
25245 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25246 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25247 %}
25248 ins_pipe( pipe_slow );
25249 %}
25250
25251
25252 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25253 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
25254 match(Set dst (VectorLongToMask src));
25255 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25256 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25257 ins_encode %{
25258 int mask_len = Matcher::vector_length(this);
25259 assert(mask_len <= 32, "invalid mask length");
25260 int vec_enc = vector_length_encoding(mask_len);
25261 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25262 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25263 %}
25264 ins_pipe( pipe_slow );
25265 %}
25266
25267 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25268 predicate(n->bottom_type()->isa_pvectmask());
25269 match(Set dst (VectorLongToMask src));
25270 format %{ "long_to_mask_evex $dst, $src\t!" %}
25271 ins_encode %{
25272 __ kmov($dst$$KRegister, $src$$Register);
25273 %}
25274 ins_pipe( pipe_slow );
25275 %}
25276
25277 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25278 match(Set dst (AndVMask src1 src2));
25279 match(Set dst (OrVMask src1 src2));
25280 match(Set dst (XorVMask src1 src2));
25281 effect(TEMP kscratch);
25282 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25283 ins_encode %{
25284 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25285 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25286 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25287 uint masklen = Matcher::vector_length(this);
25288 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25289 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25290 %}
25291 ins_pipe( pipe_slow );
25292 %}
25293
25294 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25295 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25296 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25297 ins_encode %{
25298 int vlen_enc = vector_length_encoding(this);
25299 BasicType bt = Matcher::vector_element_basic_type(this);
25300 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25301 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25302 %}
25303 ins_pipe( pipe_slow );
25304 %}
25305
25306 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25307 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25308 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25309 ins_encode %{
25310 int vlen_enc = vector_length_encoding(this);
25311 BasicType bt = Matcher::vector_element_basic_type(this);
25312 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25313 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25314 %}
25315 ins_pipe( pipe_slow );
25316 %}
25317
25318 instruct castMM(kReg dst)
25319 %{
25320 match(Set dst (CastVV dst));
25321
25322 size(0);
25323 format %{ "# castVV of $dst" %}
25324 ins_encode(/* empty encoding */);
25325 ins_cost(0);
25326 ins_pipe(empty);
25327 %}
25328
25329 instruct castVV(vec dst)
25330 %{
25331 match(Set dst (CastVV dst));
25332
25333 size(0);
25334 format %{ "# castVV of $dst" %}
25335 ins_encode(/* empty encoding */);
25336 ins_cost(0);
25337 ins_pipe(empty);
25338 %}
25339
25340 instruct castVVLeg(legVec dst)
25341 %{
25342 match(Set dst (CastVV dst));
25343
25344 size(0);
25345 format %{ "# castVV of $dst" %}
25346 ins_encode(/* empty encoding */);
25347 ins_cost(0);
25348 ins_pipe(empty);
25349 %}
25350
25351 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25352 %{
25353 match(Set dst (IsInfiniteF src));
25354 effect(TEMP ktmp, KILL cr);
25355 format %{ "float_class_check $dst, $src" %}
25356 ins_encode %{
25357 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25358 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25359 %}
25360 ins_pipe(pipe_slow);
25361 %}
25362
25363 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25364 %{
25365 match(Set dst (IsInfiniteD src));
25366 effect(TEMP ktmp, KILL cr);
25367 format %{ "double_class_check $dst, $src" %}
25368 ins_encode %{
25369 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25370 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25371 %}
25372 ins_pipe(pipe_slow);
25373 %}
25374
25375 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25376 %{
25377 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25378 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25379 match(Set dst (SaturatingAddV src1 src2));
25380 match(Set dst (SaturatingSubV src1 src2));
25381 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25382 ins_encode %{
25383 int vlen_enc = vector_length_encoding(this);
25384 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25385 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25386 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25387 %}
25388 ins_pipe(pipe_slow);
25389 %}
25390
25391 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25392 %{
25393 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25394 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25395 match(Set dst (SaturatingAddV src1 src2));
25396 match(Set dst (SaturatingSubV src1 src2));
25397 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25398 ins_encode %{
25399 int vlen_enc = vector_length_encoding(this);
25400 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25401 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25402 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25403 %}
25404 ins_pipe(pipe_slow);
25405 %}
25406
25407 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25408 %{
25409 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25410 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25411 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25412 match(Set dst (SaturatingAddV src1 src2));
25413 match(Set dst (SaturatingSubV src1 src2));
25414 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25415 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25416 ins_encode %{
25417 int vlen_enc = vector_length_encoding(this);
25418 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25419 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25420 $src1$$XMMRegister, $src2$$XMMRegister,
25421 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25422 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25423 %}
25424 ins_pipe(pipe_slow);
25425 %}
25426
25427 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25428 %{
25429 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25430 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25431 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25432 match(Set dst (SaturatingAddV src1 src2));
25433 match(Set dst (SaturatingSubV src1 src2));
25434 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25435 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25436 ins_encode %{
25437 int vlen_enc = vector_length_encoding(this);
25438 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25439 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25440 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25441 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25442 %}
25443 ins_pipe(pipe_slow);
25444 %}
25445
25446 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25447 %{
25448 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25449 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25450 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25451 match(Set dst (SaturatingAddV src1 src2));
25452 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25453 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25454 ins_encode %{
25455 int vlen_enc = vector_length_encoding(this);
25456 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25457 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25458 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25459 %}
25460 ins_pipe(pipe_slow);
25461 %}
25462
25463 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25464 %{
25465 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25466 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25467 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25468 match(Set dst (SaturatingAddV src1 src2));
25469 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25470 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25471 ins_encode %{
25472 int vlen_enc = vector_length_encoding(this);
25473 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25474 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25475 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25476 %}
25477 ins_pipe(pipe_slow);
25478 %}
25479
25480 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25481 %{
25482 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25483 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25484 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25485 match(Set dst (SaturatingSubV src1 src2));
25486 effect(TEMP ktmp);
25487 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25488 ins_encode %{
25489 int vlen_enc = vector_length_encoding(this);
25490 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25491 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25492 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25493 %}
25494 ins_pipe(pipe_slow);
25495 %}
25496
25497 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25498 %{
25499 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25500 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25501 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25502 match(Set dst (SaturatingSubV src1 src2));
25503 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25504 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25505 ins_encode %{
25506 int vlen_enc = vector_length_encoding(this);
25507 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25508 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25509 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25510 %}
25511 ins_pipe(pipe_slow);
25512 %}
25513
25514 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25515 %{
25516 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25517 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25518 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25519 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25520 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25521 ins_encode %{
25522 int vlen_enc = vector_length_encoding(this);
25523 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25524 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25525 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25526 %}
25527 ins_pipe(pipe_slow);
25528 %}
25529
25530 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25531 %{
25532 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25533 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25534 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25535 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25536 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25537 ins_encode %{
25538 int vlen_enc = vector_length_encoding(this);
25539 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25540 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25541 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25542 %}
25543 ins_pipe(pipe_slow);
25544 %}
25545
25546 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25547 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25548 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25549 match(Set dst (SaturatingAddV (Binary dst src) mask));
25550 match(Set dst (SaturatingSubV (Binary dst src) mask));
25551 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25552 ins_encode %{
25553 int vlen_enc = vector_length_encoding(this);
25554 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25555 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25556 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25557 %}
25558 ins_pipe( pipe_slow );
25559 %}
25560
25561 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25562 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25563 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25564 match(Set dst (SaturatingAddV (Binary dst src) mask));
25565 match(Set dst (SaturatingSubV (Binary dst src) mask));
25566 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25567 ins_encode %{
25568 int vlen_enc = vector_length_encoding(this);
25569 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25570 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25571 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25572 %}
25573 ins_pipe( pipe_slow );
25574 %}
25575
25576 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25577 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25578 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25579 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25580 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25581 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25582 ins_encode %{
25583 int vlen_enc = vector_length_encoding(this);
25584 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25585 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25586 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25587 %}
25588 ins_pipe( pipe_slow );
25589 %}
25590
25591 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25592 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25593 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25594 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25595 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25596 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25597 ins_encode %{
25598 int vlen_enc = vector_length_encoding(this);
25599 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25600 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25601 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25602 %}
25603 ins_pipe( pipe_slow );
25604 %}
25605
25606 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25607 %{
25608 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25609 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25610 ins_encode %{
25611 int vlen_enc = vector_length_encoding(this);
25612 BasicType bt = Matcher::vector_element_basic_type(this);
25613 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25614 %}
25615 ins_pipe(pipe_slow);
25616 %}
25617
25618 instruct reinterpretS2HF(regF dst, rRegI src)
25619 %{
25620 match(Set dst (ReinterpretS2HF src));
25621 format %{ "evmovw $dst, $src" %}
25622 ins_encode %{
25623 __ evmovw($dst$$XMMRegister, $src$$Register);
25624 %}
25625 ins_pipe(pipe_slow);
25626 %}
25627
25628 instruct reinterpretHF2S(rRegI dst, regF src)
25629 %{
25630 match(Set dst (ReinterpretHF2S src));
25631 format %{ "evmovw $dst, $src" %}
25632 ins_encode %{
25633 __ evmovw($dst$$Register, $src$$XMMRegister);
25634 __ narrow_subword_type($dst$$Register, T_SHORT);
25635 %}
25636 ins_pipe(pipe_slow);
25637 %}
25638
25639 instruct convF2HFAndS2HF(regF dst, regF src)
25640 %{
25641 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25642 format %{ "convF2HFAndS2HF $dst, $src" %}
25643 ins_encode %{
25644 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25645 %}
25646 ins_pipe(pipe_slow);
25647 %}
25648
25649 instruct convHF2SAndHF2F(regF dst, regF src)
25650 %{
25651 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25652 format %{ "convHF2SAndHF2F $dst, $src" %}
25653 ins_encode %{
25654 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25655 %}
25656 ins_pipe(pipe_slow);
25657 %}
25658
25659 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25660 %{
25661 match(Set dst (SqrtHF src));
25662 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25663 ins_encode %{
25664 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25665 %}
25666 ins_pipe(pipe_slow);
25667 %}
25668
25669 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25670 %{
25671 match(Set dst (AddHF src1 src2));
25672 match(Set dst (DivHF src1 src2));
25673 match(Set dst (MulHF src1 src2));
25674 match(Set dst (SubHF src1 src2));
25675 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25676 ins_encode %{
25677 int opcode = this->ideal_Opcode();
25678 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25679 %}
25680 ins_pipe(pipe_slow);
25681 %}
25682
25683 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25684 %{
25685 predicate(VM_Version::supports_avx10_2());
25686 match(Set dst (MaxHF src1 src2));
25687 match(Set dst (MinHF src1 src2));
25688
25689 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25690 ins_encode %{
25691 int opcode = this->ideal_Opcode();
25692 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25693 %}
25694 ins_pipe( pipe_slow );
25695 %}
25696
25697 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25698 %{
25699 predicate(!VM_Version::supports_avx10_2());
25700 match(Set dst (MaxHF src1 src2));
25701 match(Set dst (MinHF src1 src2));
25702 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25703
25704 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25705 ins_encode %{
25706 int opcode = this->ideal_Opcode();
25707 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25708 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25709 %}
25710 ins_pipe( pipe_slow );
25711 %}
25712
25713 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25714 %{
25715 match(Set dst (FmaHF src2 (Binary dst src1)));
25716 effect(DEF dst);
25717 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25718 ins_encode %{
25719 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25720 %}
25721 ins_pipe( pipe_slow );
25722 %}
25723
25724
25725 instruct vector_sqrt_HF_reg(vec dst, vec src)
25726 %{
25727 match(Set dst (SqrtVHF src));
25728 format %{ "vector_sqrt_fp16 $dst, $src" %}
25729 ins_encode %{
25730 int vlen_enc = vector_length_encoding(this);
25731 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25732 %}
25733 ins_pipe(pipe_slow);
25734 %}
25735
25736 instruct vector_sqrt_HF_mem(vec dst, memory src)
25737 %{
25738 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25739 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25740 ins_encode %{
25741 int vlen_enc = vector_length_encoding(this);
25742 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25743 %}
25744 ins_pipe(pipe_slow);
25745 %}
25746
25747 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25748 %{
25749 match(Set dst (AddVHF src1 src2));
25750 match(Set dst (DivVHF src1 src2));
25751 match(Set dst (MulVHF src1 src2));
25752 match(Set dst (SubVHF src1 src2));
25753 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25754 ins_encode %{
25755 int vlen_enc = vector_length_encoding(this);
25756 int opcode = this->ideal_Opcode();
25757 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25758 %}
25759 ins_pipe(pipe_slow);
25760 %}
25761
25762
25763 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25764 %{
25765 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25766 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25767 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25768 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25769 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25770 ins_encode %{
25771 int vlen_enc = vector_length_encoding(this);
25772 int opcode = this->ideal_Opcode();
25773 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25774 %}
25775 ins_pipe(pipe_slow);
25776 %}
25777
25778 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25779 %{
25780 match(Set dst (FmaVHF src2 (Binary dst src1)));
25781 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25782 ins_encode %{
25783 int vlen_enc = vector_length_encoding(this);
25784 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25785 %}
25786 ins_pipe( pipe_slow );
25787 %}
25788
25789 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25790 %{
25791 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25792 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25793 ins_encode %{
25794 int vlen_enc = vector_length_encoding(this);
25795 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25796 %}
25797 ins_pipe( pipe_slow );
25798 %}
25799
25800 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25801 %{
25802 predicate(VM_Version::supports_avx10_2());
25803 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25804 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25805 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25806 ins_encode %{
25807 int vlen_enc = vector_length_encoding(this);
25808 int opcode = this->ideal_Opcode();
25809 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25810 k0, vlen_enc);
25811 %}
25812 ins_pipe( pipe_slow );
25813 %}
25814
25815 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25816 %{
25817 predicate(VM_Version::supports_avx10_2());
25818 match(Set dst (MinVHF src1 src2));
25819 match(Set dst (MaxVHF src1 src2));
25820 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25821 ins_encode %{
25822 int vlen_enc = vector_length_encoding(this);
25823 int opcode = this->ideal_Opcode();
25824 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25825 k0, vlen_enc);
25826 %}
25827 ins_pipe( pipe_slow );
25828 %}
25829
25830 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25831 %{
25832 predicate(!VM_Version::supports_avx10_2());
25833 match(Set dst (MinVHF src1 src2));
25834 match(Set dst (MaxVHF src1 src2));
25835 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25836 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25837 ins_encode %{
25838 int vlen_enc = vector_length_encoding(this);
25839 int opcode = this->ideal_Opcode();
25840 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25841 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25842 %}
25843 ins_pipe( pipe_slow );
25844 %}
25845
25846 //----------PEEPHOLE RULES-----------------------------------------------------
25847 // These must follow all instruction definitions as they use the names
25848 // defined in the instructions definitions.
25849 //
25850 // peeppredicate ( rule_predicate );
25851 // // the predicate unless which the peephole rule will be ignored
25852 //
25853 // peepmatch ( root_instr_name [preceding_instruction]* );
25854 //
25855 // peepprocedure ( procedure_name );
25856 // // provide a procedure name to perform the optimization, the procedure should
25857 // // reside in the architecture dependent peephole file, the method has the
25858 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25859 // // with the arguments being the basic block, the current node index inside the
25860 // // block, the register allocator, the functions upon invoked return a new node
25861 // // defined in peepreplace, and the rules of the nodes appearing in the
25862 // // corresponding peepmatch, the function return true if successful, else
25863 // // return false
25864 //
25865 // peepconstraint %{
25866 // (instruction_number.operand_name relational_op instruction_number.operand_name
25867 // [, ...] );
25868 // // instruction numbers are zero-based using left to right order in peepmatch
25869 //
25870 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25871 // // provide an instruction_number.operand_name for each operand that appears
25872 // // in the replacement instruction's match rule
25873 //
25874 // ---------VM FLAGS---------------------------------------------------------
25875 //
25876 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25877 //
25878 // Each peephole rule is given an identifying number starting with zero and
25879 // increasing by one in the order seen by the parser. An individual peephole
25880 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25881 // on the command-line.
25882 //
25883 // ---------CURRENT LIMITATIONS----------------------------------------------
25884 //
25885 // Only transformations inside a basic block (do we need more for peephole)
25886 //
25887 // ---------EXAMPLE----------------------------------------------------------
25888 //
25889 // // pertinent parts of existing instructions in architecture description
25890 // instruct movI(rRegI dst, rRegI src)
25891 // %{
25892 // match(Set dst (CopyI src));
25893 // %}
25894 //
25895 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25896 // %{
25897 // match(Set dst (AddI dst src));
25898 // effect(KILL cr);
25899 // %}
25900 //
25901 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25902 // %{
25903 // match(Set dst (AddI dst src));
25904 // %}
25905 //
25906 // 1. Simple replacement
25907 // - Only match adjacent instructions in same basic block
25908 // - Only equality constraints
25909 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25910 // - Only one replacement instruction
25911 //
25912 // // Change (inc mov) to lea
25913 // peephole %{
25914 // // lea should only be emitted when beneficial
25915 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25916 // // increment preceded by register-register move
25917 // peepmatch ( incI_rReg movI );
25918 // // require that the destination register of the increment
25919 // // match the destination register of the move
25920 // peepconstraint ( 0.dst == 1.dst );
25921 // // construct a replacement instruction that sets
25922 // // the destination to ( move's source register + one )
25923 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25924 // %}
25925 //
25926 // 2. Procedural replacement
25927 // - More flexible finding relevent nodes
25928 // - More flexible constraints
25929 // - More flexible transformations
25930 // - May utilise architecture-dependent API more effectively
25931 // - Currently only one replacement instruction due to adlc parsing capabilities
25932 //
25933 // // Change (inc mov) to lea
25934 // peephole %{
25935 // // lea should only be emitted when beneficial
25936 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25937 // // the rule numbers of these nodes inside are passed into the function below
25938 // peepmatch ( incI_rReg movI );
25939 // // the method that takes the responsibility of transformation
25940 // peepprocedure ( inc_mov_to_lea );
25941 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25942 // // node is passed into the function above
25943 // peepreplace ( leaI_rReg_immI() );
25944 // %}
25945
25946 // These instructions is not matched by the matcher but used by the peephole
25947 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25948 %{
25949 predicate(false);
25950 match(Set dst (AddI src1 src2));
25951 format %{ "leal $dst, [$src1 + $src2]" %}
25952 ins_encode %{
25953 Register dst = $dst$$Register;
25954 Register src1 = $src1$$Register;
25955 Register src2 = $src2$$Register;
25956 if (src1 != rbp && src1 != r13) {
25957 __ leal(dst, Address(src1, src2, Address::times_1));
25958 } else {
25959 assert(src2 != rbp && src2 != r13, "");
25960 __ leal(dst, Address(src2, src1, Address::times_1));
25961 }
25962 %}
25963 ins_pipe(ialu_reg_reg);
25964 %}
25965
25966 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25967 %{
25968 predicate(false);
25969 match(Set dst (AddI src1 src2));
25970 format %{ "leal $dst, [$src1 + $src2]" %}
25971 ins_encode %{
25972 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25973 %}
25974 ins_pipe(ialu_reg_reg);
25975 %}
25976
25977 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25978 %{
25979 predicate(false);
25980 match(Set dst (LShiftI src shift));
25981 format %{ "leal $dst, [$src << $shift]" %}
25982 ins_encode %{
25983 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25984 Register src = $src$$Register;
25985 if (scale == Address::times_2 && src != rbp && src != r13) {
25986 __ leal($dst$$Register, Address(src, src, Address::times_1));
25987 } else {
25988 __ leal($dst$$Register, Address(noreg, src, scale));
25989 }
25990 %}
25991 ins_pipe(ialu_reg_reg);
25992 %}
25993
25994 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25995 %{
25996 predicate(false);
25997 match(Set dst (AddL src1 src2));
25998 format %{ "leaq $dst, [$src1 + $src2]" %}
25999 ins_encode %{
26000 Register dst = $dst$$Register;
26001 Register src1 = $src1$$Register;
26002 Register src2 = $src2$$Register;
26003 if (src1 != rbp && src1 != r13) {
26004 __ leaq(dst, Address(src1, src2, Address::times_1));
26005 } else {
26006 assert(src2 != rbp && src2 != r13, "");
26007 __ leaq(dst, Address(src2, src1, Address::times_1));
26008 }
26009 %}
26010 ins_pipe(ialu_reg_reg);
26011 %}
26012
26013 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
26014 %{
26015 predicate(false);
26016 match(Set dst (AddL src1 src2));
26017 format %{ "leaq $dst, [$src1 + $src2]" %}
26018 ins_encode %{
26019 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
26020 %}
26021 ins_pipe(ialu_reg_reg);
26022 %}
26023
26024 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
26025 %{
26026 predicate(false);
26027 match(Set dst (LShiftL src shift));
26028 format %{ "leaq $dst, [$src << $shift]" %}
26029 ins_encode %{
26030 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
26031 Register src = $src$$Register;
26032 if (scale == Address::times_2 && src != rbp && src != r13) {
26033 __ leaq($dst$$Register, Address(src, src, Address::times_1));
26034 } else {
26035 __ leaq($dst$$Register, Address(noreg, src, scale));
26036 }
26037 %}
26038 ins_pipe(ialu_reg_reg);
26039 %}
26040
26041 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
26042 // sal}) with lea instructions. The {add, sal} rules are beneficial in
26043 // processors with at least partial ALU support for lea
26044 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
26045 // beneficial for processors with full ALU support
26046 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
26047
26048 peephole
26049 %{
26050 peeppredicate(VM_Version::supports_fast_2op_lea());
26051 peepmatch (addI_rReg);
26052 peepprocedure (lea_coalesce_reg);
26053 peepreplace (leaI_rReg_rReg_peep());
26054 %}
26055
26056 peephole
26057 %{
26058 peeppredicate(VM_Version::supports_fast_2op_lea());
26059 peepmatch (addI_rReg_imm);
26060 peepprocedure (lea_coalesce_imm);
26061 peepreplace (leaI_rReg_immI_peep());
26062 %}
26063
26064 peephole
26065 %{
26066 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26067 VM_Version::is_intel_cascade_lake());
26068 peepmatch (incI_rReg);
26069 peepprocedure (lea_coalesce_imm);
26070 peepreplace (leaI_rReg_immI_peep());
26071 %}
26072
26073 peephole
26074 %{
26075 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26076 VM_Version::is_intel_cascade_lake());
26077 peepmatch (decI_rReg);
26078 peepprocedure (lea_coalesce_imm);
26079 peepreplace (leaI_rReg_immI_peep());
26080 %}
26081
26082 peephole
26083 %{
26084 peeppredicate(VM_Version::supports_fast_2op_lea());
26085 peepmatch (salI_rReg_immI2);
26086 peepprocedure (lea_coalesce_imm);
26087 peepreplace (leaI_rReg_immI2_peep());
26088 %}
26089
26090 peephole
26091 %{
26092 peeppredicate(VM_Version::supports_fast_2op_lea());
26093 peepmatch (addL_rReg);
26094 peepprocedure (lea_coalesce_reg);
26095 peepreplace (leaL_rReg_rReg_peep());
26096 %}
26097
26098 peephole
26099 %{
26100 peeppredicate(VM_Version::supports_fast_2op_lea());
26101 peepmatch (addL_rReg_imm);
26102 peepprocedure (lea_coalesce_imm);
26103 peepreplace (leaL_rReg_immL32_peep());
26104 %}
26105
26106 peephole
26107 %{
26108 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26109 VM_Version::is_intel_cascade_lake());
26110 peepmatch (incL_rReg);
26111 peepprocedure (lea_coalesce_imm);
26112 peepreplace (leaL_rReg_immL32_peep());
26113 %}
26114
26115 peephole
26116 %{
26117 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26118 VM_Version::is_intel_cascade_lake());
26119 peepmatch (decL_rReg);
26120 peepprocedure (lea_coalesce_imm);
26121 peepreplace (leaL_rReg_immL32_peep());
26122 %}
26123
26124 peephole
26125 %{
26126 peeppredicate(VM_Version::supports_fast_2op_lea());
26127 peepmatch (salL_rReg_immI2);
26128 peepprocedure (lea_coalesce_imm);
26129 peepreplace (leaL_rReg_immI2_peep());
26130 %}
26131
26132 peephole
26133 %{
26134 peepmatch (leaPCompressedOopOffset);
26135 peepprocedure (lea_remove_redundant);
26136 %}
26137
26138 peephole
26139 %{
26140 peepmatch (leaP8Narrow);
26141 peepprocedure (lea_remove_redundant);
26142 %}
26143
26144 peephole
26145 %{
26146 peepmatch (leaP32Narrow);
26147 peepprocedure (lea_remove_redundant);
26148 %}
26149
26150 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26151 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26152
26153 //int variant
26154 peephole
26155 %{
26156 peepmatch (testI_reg);
26157 peepprocedure (test_may_remove);
26158 %}
26159
26160 //long variant
26161 peephole
26162 %{
26163 peepmatch (testL_reg);
26164 peepprocedure (test_may_remove);
26165 %}
26166
26167
26168 //----------SMARTSPILL RULES---------------------------------------------------
26169 // These must follow all instruction definitions as they use the names
26170 // defined in the instructions definitions.