1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 if (_entry_point == nullptr) {
1653 // CallLeafNoFPInDirect
1654 return 3; // callq (register)
1655 }
1656 int offset = 13; // movq r10,#addr; callq (r10)
1657 if (this->ideal_Opcode() != Op_CallLeafVector) {
1658 offset += clear_avx_size();
1659 }
1660 return offset;
1661 }
1662
1663 //
1664 // Compute padding required for nodes which need alignment
1665 //
1666
1667 // The address of the call instruction needs to be 4-byte aligned to
1668 // ensure that it does not span a cache line so that it can be patched.
1669 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1670 {
1671 current_offset += clear_avx_size(); // skip vzeroupper
1672 current_offset += 1; // skip call opcode byte
1673 return align_up(current_offset, alignment_required()) - current_offset;
1674 }
1675
1676 // The address of the call instruction needs to be 4-byte aligned to
1677 // ensure that it does not span a cache line so that it can be patched.
1678 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1679 {
1680 current_offset += clear_avx_size(); // skip vzeroupper
1681 current_offset += 11; // skip movq instruction + call opcode byte
1682 return align_up(current_offset, alignment_required()) - current_offset;
1683 }
1684
1685 // This could be in MacroAssembler but it's fairly C2 specific
1686 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1687 Label exit;
1688 __ jccb(Assembler::noParity, exit);
1689 __ pushf();
1690 //
1691 // comiss/ucomiss instructions set ZF,PF,CF flags and
1692 // zero OF,AF,SF for NaN values.
1693 // Fixup flags by zeroing ZF,PF so that compare of NaN
1694 // values returns 'less than' result (CF is set).
1695 // Leave the rest of flags unchanged.
1696 //
1697 // 7 6 5 4 3 2 1 0
1698 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1699 // 0 0 1 0 1 0 1 1 (0x2B)
1700 //
1701 __ andq(Address(rsp, 0), 0xffffff2b);
1702 __ popf();
1703 __ bind(exit);
1704 }
1705
1706 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1707 // If any floating point comparison instruction is used, unordered case always triggers jump
1708 // for below condition, CF=1 is true when at least one input is NaN
1709 Label done;
1710 __ movl(dst, -1);
1711 __ jcc(Assembler::below, done);
1712 __ setcc(Assembler::notEqual, dst);
1713 __ bind(done);
1714 }
1715
1716 // Math.min() # Math.max()
1717 // --------------------------
1718 // ucomis[s/d] #
1719 // ja -> b # a
1720 // jp -> NaN # NaN
1721 // jb -> a # b
1722 // je #
1723 // |-jz -> a | b # a & b
1724 // | -> a #
1725 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1726 XMMRegister a, XMMRegister b,
1727 XMMRegister xmmt, Register rt,
1728 bool min, bool single) {
1729
1730 Label nan, zero, below, above, done;
1731
1732 if (single)
1733 __ ucomiss(a, b);
1734 else
1735 __ ucomisd(a, b);
1736
1737 if (dst->encoding() != (min ? b : a)->encoding())
1738 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1739 else
1740 __ jccb(Assembler::above, done);
1741
1742 __ jccb(Assembler::parity, nan); // PF=1
1743 __ jccb(Assembler::below, below); // CF=1
1744
1745 // equal
1746 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1747 if (single) {
1748 __ ucomiss(a, xmmt);
1749 __ jccb(Assembler::equal, zero);
1750
1751 __ movflt(dst, a);
1752 __ jmp(done);
1753 }
1754 else {
1755 __ ucomisd(a, xmmt);
1756 __ jccb(Assembler::equal, zero);
1757
1758 __ movdbl(dst, a);
1759 __ jmp(done);
1760 }
1761
1762 __ bind(zero);
1763 if (min)
1764 __ vpor(dst, a, b, Assembler::AVX_128bit);
1765 else
1766 __ vpand(dst, a, b, Assembler::AVX_128bit);
1767
1768 __ jmp(done);
1769
1770 __ bind(above);
1771 if (single)
1772 __ movflt(dst, min ? b : a);
1773 else
1774 __ movdbl(dst, min ? b : a);
1775
1776 __ jmp(done);
1777
1778 __ bind(nan);
1779 if (single) {
1780 __ movl(rt, 0x7fc00000); // Float.NaN
1781 __ movdl(dst, rt);
1782 }
1783 else {
1784 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1785 __ movdq(dst, rt);
1786 }
1787 __ jmp(done);
1788
1789 __ bind(below);
1790 if (single)
1791 __ movflt(dst, min ? a : b);
1792 else
1793 __ movdbl(dst, min ? a : b);
1794
1795 __ bind(done);
1796 }
1797
1798 //=============================================================================
1799 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1800
1801 int ConstantTable::calculate_table_base_offset() const {
1802 return 0; // absolute addressing, no offset
1803 }
1804
1805 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1806 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1807 ShouldNotReachHere();
1808 }
1809
1810 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1811 // Empty encoding
1812 }
1813
1814 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1815 return 0;
1816 }
1817
1818 #ifndef PRODUCT
1819 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1820 st->print("# MachConstantBaseNode (empty encoding)");
1821 }
1822 #endif
1823
1824
1825 //=============================================================================
1826 #ifndef PRODUCT
1827 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1828 Compile* C = ra_->C;
1829
1830 int framesize = C->output()->frame_size_in_bytes();
1831 int bangsize = C->output()->bang_size_in_bytes();
1832 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1833 // Remove wordSize for return addr which is already pushed.
1834 framesize -= wordSize;
1835
1836 if (C->output()->need_stack_bang(bangsize)) {
1837 framesize -= wordSize;
1838 st->print("# stack bang (%d bytes)", bangsize);
1839 st->print("\n\t");
1840 st->print("pushq rbp\t# Save rbp");
1841 if (PreserveFramePointer) {
1842 st->print("\n\t");
1843 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1844 }
1845 if (framesize) {
1846 st->print("\n\t");
1847 st->print("subq rsp, #%d\t# Create frame",framesize);
1848 }
1849 } else {
1850 st->print("subq rsp, #%d\t# Create frame",framesize);
1851 st->print("\n\t");
1852 framesize -= wordSize;
1853 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1854 if (PreserveFramePointer) {
1855 st->print("\n\t");
1856 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1857 if (framesize > 0) {
1858 st->print("\n\t");
1859 st->print("addq rbp, #%d", framesize);
1860 }
1861 }
1862 }
1863
1864 if (VerifyStackAtCalls) {
1865 st->print("\n\t");
1866 framesize -= wordSize;
1867 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1868 #ifdef ASSERT
1869 st->print("\n\t");
1870 st->print("# stack alignment check");
1871 #endif
1872 }
1873 if (C->stub_function() != nullptr) {
1874 st->print("\n\t");
1875 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1876 st->print("\n\t");
1877 st->print("je fast_entry\t");
1878 st->print("\n\t");
1879 st->print("call #nmethod_entry_barrier_stub\t");
1880 st->print("\n\tfast_entry:");
1881 }
1882 st->cr();
1883 }
1884 #endif
1885
1886 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1887 Compile* C = ra_->C;
1888
1889 __ verified_entry(C);
1890
1891 if (ra_->C->stub_function() == nullptr) {
1892 __ entry_barrier();
1893 }
1894
1895 if (!Compile::current()->output()->in_scratch_emit_size()) {
1896 __ bind(*_verified_entry);
1897 }
1898
1899 C->output()->set_frame_complete(__ offset());
1900
1901 if (C->has_mach_constant_base_node()) {
1902 // NOTE: We set the table base offset here because users might be
1903 // emitted before MachConstantBaseNode.
1904 ConstantTable& constant_table = C->output()->constant_table();
1905 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1906 }
1907 }
1908
1909
1910 int MachPrologNode::reloc() const
1911 {
1912 return 0; // a large enough number
1913 }
1914
1915 //=============================================================================
1916 #ifndef PRODUCT
1917 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1918 {
1919 Compile* C = ra_->C;
1920 if (generate_vzeroupper(C)) {
1921 st->print("vzeroupper");
1922 st->cr(); st->print("\t");
1923 }
1924
1925 int framesize = C->output()->frame_size_in_bytes();
1926 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1927 // Remove word for return adr already pushed
1928 // and RBP
1929 framesize -= 2*wordSize;
1930
1931 if (framesize) {
1932 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1933 st->print("\t");
1934 }
1935
1936 st->print_cr("popq rbp");
1937 if (do_polling() && C->is_method_compilation()) {
1938 st->print("\t");
1939 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1940 "ja #safepoint_stub\t"
1941 "# Safepoint: poll for GC");
1942 }
1943 }
1944 #endif
1945
1946 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1947 {
1948 Compile* C = ra_->C;
1949
1950 if (generate_vzeroupper(C)) {
1951 // Clear upper bits of YMM registers when current compiled code uses
1952 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1953 __ vzeroupper();
1954 }
1955
1956 // Subtract two words to account for return address and rbp
1957 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
1958 __ remove_frame(initial_framesize, C->needs_stack_repair());
1959
1960 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1961 __ reserved_stack_check();
1962 }
1963
1964 if (do_polling() && C->is_method_compilation()) {
1965 Label dummy_label;
1966 Label* code_stub = &dummy_label;
1967 if (!C->output()->in_scratch_emit_size()) {
1968 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1969 C->output()->add_stub(stub);
1970 code_stub = &stub->entry();
1971 }
1972 __ relocate(relocInfo::poll_return_type);
1973 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1974 }
1975 }
1976
1977 int MachEpilogNode::reloc() const
1978 {
1979 return 2; // a large enough number
1980 }
1981
1982 const Pipeline* MachEpilogNode::pipeline() const
1983 {
1984 return MachNode::pipeline_class();
1985 }
1986
1987 //=============================================================================
1988
1989 enum RC {
1990 rc_bad,
1991 rc_int,
1992 rc_kreg,
1993 rc_float,
1994 rc_stack
1995 };
1996
1997 static enum RC rc_class(OptoReg::Name reg)
1998 {
1999 if( !OptoReg::is_valid(reg) ) return rc_bad;
2000
2001 if (OptoReg::is_stack(reg)) return rc_stack;
2002
2003 VMReg r = OptoReg::as_VMReg(reg);
2004
2005 if (r->is_Register()) return rc_int;
2006
2007 if (r->is_KRegister()) return rc_kreg;
2008
2009 assert(r->is_XMMRegister(), "must be");
2010 return rc_float;
2011 }
2012
2013 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2014 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2015 int src_hi, int dst_hi, uint ireg, outputStream* st);
2016
2017 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2018 int stack_offset, int reg, uint ireg, outputStream* st);
2019
2020 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2021 int dst_offset, uint ireg, outputStream* st) {
2022 if (masm) {
2023 switch (ireg) {
2024 case Op_VecS:
2025 __ movq(Address(rsp, -8), rax);
2026 __ movl(rax, Address(rsp, src_offset));
2027 __ movl(Address(rsp, dst_offset), rax);
2028 __ movq(rax, Address(rsp, -8));
2029 break;
2030 case Op_VecD:
2031 __ pushq(Address(rsp, src_offset));
2032 __ popq (Address(rsp, dst_offset));
2033 break;
2034 case Op_VecX:
2035 __ pushq(Address(rsp, src_offset));
2036 __ popq (Address(rsp, dst_offset));
2037 __ pushq(Address(rsp, src_offset+8));
2038 __ popq (Address(rsp, dst_offset+8));
2039 break;
2040 case Op_VecY:
2041 __ vmovdqu(Address(rsp, -32), xmm0);
2042 __ vmovdqu(xmm0, Address(rsp, src_offset));
2043 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2044 __ vmovdqu(xmm0, Address(rsp, -32));
2045 break;
2046 case Op_VecZ:
2047 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2048 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2049 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2050 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2051 break;
2052 default:
2053 ShouldNotReachHere();
2054 }
2055 #ifndef PRODUCT
2056 } else {
2057 switch (ireg) {
2058 case Op_VecS:
2059 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2060 "movl rax, [rsp + #%d]\n\t"
2061 "movl [rsp + #%d], rax\n\t"
2062 "movq rax, [rsp - #8]",
2063 src_offset, dst_offset);
2064 break;
2065 case Op_VecD:
2066 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2067 "popq [rsp + #%d]",
2068 src_offset, dst_offset);
2069 break;
2070 case Op_VecX:
2071 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2072 "popq [rsp + #%d]\n\t"
2073 "pushq [rsp + #%d]\n\t"
2074 "popq [rsp + #%d]",
2075 src_offset, dst_offset, src_offset+8, dst_offset+8);
2076 break;
2077 case Op_VecY:
2078 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2079 "vmovdqu xmm0, [rsp + #%d]\n\t"
2080 "vmovdqu [rsp + #%d], xmm0\n\t"
2081 "vmovdqu xmm0, [rsp - #32]",
2082 src_offset, dst_offset);
2083 break;
2084 case Op_VecZ:
2085 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2086 "vmovdqu xmm0, [rsp + #%d]\n\t"
2087 "vmovdqu [rsp + #%d], xmm0\n\t"
2088 "vmovdqu xmm0, [rsp - #64]",
2089 src_offset, dst_offset);
2090 break;
2091 default:
2092 ShouldNotReachHere();
2093 }
2094 #endif
2095 }
2096 }
2097
2098 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2099 PhaseRegAlloc* ra_,
2100 bool do_size,
2101 outputStream* st) const {
2102 assert(masm != nullptr || st != nullptr, "sanity");
2103 // Get registers to move
2104 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2105 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2106 OptoReg::Name dst_second = ra_->get_reg_second(this);
2107 OptoReg::Name dst_first = ra_->get_reg_first(this);
2108
2109 enum RC src_second_rc = rc_class(src_second);
2110 enum RC src_first_rc = rc_class(src_first);
2111 enum RC dst_second_rc = rc_class(dst_second);
2112 enum RC dst_first_rc = rc_class(dst_first);
2113
2114 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2115 "must move at least 1 register" );
2116
2117 if (src_first == dst_first && src_second == dst_second) {
2118 // Self copy, no move
2119 return 0;
2120 }
2121 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2122 uint ireg = ideal_reg();
2123 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2124 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2125 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2126 // mem -> mem
2127 int src_offset = ra_->reg2offset(src_first);
2128 int dst_offset = ra_->reg2offset(dst_first);
2129 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2130 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2131 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2132 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2133 int stack_offset = ra_->reg2offset(dst_first);
2134 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2135 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2136 int stack_offset = ra_->reg2offset(src_first);
2137 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2138 } else {
2139 ShouldNotReachHere();
2140 }
2141 return 0;
2142 }
2143 if (src_first_rc == rc_stack) {
2144 // mem ->
2145 if (dst_first_rc == rc_stack) {
2146 // mem -> mem
2147 assert(src_second != dst_first, "overlap");
2148 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2149 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2150 // 64-bit
2151 int src_offset = ra_->reg2offset(src_first);
2152 int dst_offset = ra_->reg2offset(dst_first);
2153 if (masm) {
2154 __ pushq(Address(rsp, src_offset));
2155 __ popq (Address(rsp, dst_offset));
2156 #ifndef PRODUCT
2157 } else {
2158 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2159 "popq [rsp + #%d]",
2160 src_offset, dst_offset);
2161 #endif
2162 }
2163 } else {
2164 // 32-bit
2165 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2166 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2167 // No pushl/popl, so:
2168 int src_offset = ra_->reg2offset(src_first);
2169 int dst_offset = ra_->reg2offset(dst_first);
2170 if (masm) {
2171 __ movq(Address(rsp, -8), rax);
2172 __ movl(rax, Address(rsp, src_offset));
2173 __ movl(Address(rsp, dst_offset), rax);
2174 __ movq(rax, Address(rsp, -8));
2175 #ifndef PRODUCT
2176 } else {
2177 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2178 "movl rax, [rsp + #%d]\n\t"
2179 "movl [rsp + #%d], rax\n\t"
2180 "movq rax, [rsp - #8]",
2181 src_offset, dst_offset);
2182 #endif
2183 }
2184 }
2185 return 0;
2186 } else if (dst_first_rc == rc_int) {
2187 // mem -> gpr
2188 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2189 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2190 // 64-bit
2191 int offset = ra_->reg2offset(src_first);
2192 if (masm) {
2193 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2194 #ifndef PRODUCT
2195 } else {
2196 st->print("movq %s, [rsp + #%d]\t# spill",
2197 Matcher::regName[dst_first],
2198 offset);
2199 #endif
2200 }
2201 } else {
2202 // 32-bit
2203 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2204 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2205 int offset = ra_->reg2offset(src_first);
2206 if (masm) {
2207 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2208 #ifndef PRODUCT
2209 } else {
2210 st->print("movl %s, [rsp + #%d]\t# spill",
2211 Matcher::regName[dst_first],
2212 offset);
2213 #endif
2214 }
2215 }
2216 return 0;
2217 } else if (dst_first_rc == rc_float) {
2218 // mem-> xmm
2219 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2220 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2221 // 64-bit
2222 int offset = ra_->reg2offset(src_first);
2223 if (masm) {
2224 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2225 #ifndef PRODUCT
2226 } else {
2227 st->print("%s %s, [rsp + #%d]\t# spill",
2228 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2229 Matcher::regName[dst_first],
2230 offset);
2231 #endif
2232 }
2233 } else {
2234 // 32-bit
2235 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2236 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2237 int offset = ra_->reg2offset(src_first);
2238 if (masm) {
2239 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2240 #ifndef PRODUCT
2241 } else {
2242 st->print("movss %s, [rsp + #%d]\t# spill",
2243 Matcher::regName[dst_first],
2244 offset);
2245 #endif
2246 }
2247 }
2248 return 0;
2249 } else if (dst_first_rc == rc_kreg) {
2250 // mem -> kreg
2251 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2252 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2253 // 64-bit
2254 int offset = ra_->reg2offset(src_first);
2255 if (masm) {
2256 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2257 #ifndef PRODUCT
2258 } else {
2259 st->print("kmovq %s, [rsp + #%d]\t# spill",
2260 Matcher::regName[dst_first],
2261 offset);
2262 #endif
2263 }
2264 }
2265 return 0;
2266 }
2267 } else if (src_first_rc == rc_int) {
2268 // gpr ->
2269 if (dst_first_rc == rc_stack) {
2270 // gpr -> mem
2271 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2272 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2273 // 64-bit
2274 int offset = ra_->reg2offset(dst_first);
2275 if (masm) {
2276 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2277 #ifndef PRODUCT
2278 } else {
2279 st->print("movq [rsp + #%d], %s\t# spill",
2280 offset,
2281 Matcher::regName[src_first]);
2282 #endif
2283 }
2284 } else {
2285 // 32-bit
2286 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2287 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2288 int offset = ra_->reg2offset(dst_first);
2289 if (masm) {
2290 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2291 #ifndef PRODUCT
2292 } else {
2293 st->print("movl [rsp + #%d], %s\t# spill",
2294 offset,
2295 Matcher::regName[src_first]);
2296 #endif
2297 }
2298 }
2299 return 0;
2300 } else if (dst_first_rc == rc_int) {
2301 // gpr -> gpr
2302 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2303 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2304 // 64-bit
2305 if (masm) {
2306 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2307 as_Register(Matcher::_regEncode[src_first]));
2308 #ifndef PRODUCT
2309 } else {
2310 st->print("movq %s, %s\t# spill",
2311 Matcher::regName[dst_first],
2312 Matcher::regName[src_first]);
2313 #endif
2314 }
2315 return 0;
2316 } else {
2317 // 32-bit
2318 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2319 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2320 if (masm) {
2321 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2322 as_Register(Matcher::_regEncode[src_first]));
2323 #ifndef PRODUCT
2324 } else {
2325 st->print("movl %s, %s\t# spill",
2326 Matcher::regName[dst_first],
2327 Matcher::regName[src_first]);
2328 #endif
2329 }
2330 return 0;
2331 }
2332 } else if (dst_first_rc == rc_float) {
2333 // gpr -> xmm
2334 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2335 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2336 // 64-bit
2337 if (masm) {
2338 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2339 #ifndef PRODUCT
2340 } else {
2341 st->print("movdq %s, %s\t# spill",
2342 Matcher::regName[dst_first],
2343 Matcher::regName[src_first]);
2344 #endif
2345 }
2346 } else {
2347 // 32-bit
2348 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2349 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2350 if (masm) {
2351 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2352 #ifndef PRODUCT
2353 } else {
2354 st->print("movdl %s, %s\t# spill",
2355 Matcher::regName[dst_first],
2356 Matcher::regName[src_first]);
2357 #endif
2358 }
2359 }
2360 return 0;
2361 } else if (dst_first_rc == rc_kreg) {
2362 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2363 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2364 // 64-bit
2365 if (masm) {
2366 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2367 #ifndef PRODUCT
2368 } else {
2369 st->print("kmovq %s, %s\t# spill",
2370 Matcher::regName[dst_first],
2371 Matcher::regName[src_first]);
2372 #endif
2373 }
2374 }
2375 Unimplemented();
2376 return 0;
2377 }
2378 } else if (src_first_rc == rc_float) {
2379 // xmm ->
2380 if (dst_first_rc == rc_stack) {
2381 // xmm -> mem
2382 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2383 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2384 // 64-bit
2385 int offset = ra_->reg2offset(dst_first);
2386 if (masm) {
2387 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2388 #ifndef PRODUCT
2389 } else {
2390 st->print("movsd [rsp + #%d], %s\t# spill",
2391 offset,
2392 Matcher::regName[src_first]);
2393 #endif
2394 }
2395 } else {
2396 // 32-bit
2397 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2398 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2399 int offset = ra_->reg2offset(dst_first);
2400 if (masm) {
2401 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2402 #ifndef PRODUCT
2403 } else {
2404 st->print("movss [rsp + #%d], %s\t# spill",
2405 offset,
2406 Matcher::regName[src_first]);
2407 #endif
2408 }
2409 }
2410 return 0;
2411 } else if (dst_first_rc == rc_int) {
2412 // xmm -> gpr
2413 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2414 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2415 // 64-bit
2416 if (masm) {
2417 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2418 #ifndef PRODUCT
2419 } else {
2420 st->print("movdq %s, %s\t# spill",
2421 Matcher::regName[dst_first],
2422 Matcher::regName[src_first]);
2423 #endif
2424 }
2425 } else {
2426 // 32-bit
2427 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2428 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2429 if (masm) {
2430 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2431 #ifndef PRODUCT
2432 } else {
2433 st->print("movdl %s, %s\t# spill",
2434 Matcher::regName[dst_first],
2435 Matcher::regName[src_first]);
2436 #endif
2437 }
2438 }
2439 return 0;
2440 } else if (dst_first_rc == rc_float) {
2441 // xmm -> xmm
2442 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2443 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2444 // 64-bit
2445 if (masm) {
2446 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2447 #ifndef PRODUCT
2448 } else {
2449 st->print("%s %s, %s\t# spill",
2450 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2451 Matcher::regName[dst_first],
2452 Matcher::regName[src_first]);
2453 #endif
2454 }
2455 } else {
2456 // 32-bit
2457 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2458 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2459 if (masm) {
2460 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2461 #ifndef PRODUCT
2462 } else {
2463 st->print("%s %s, %s\t# spill",
2464 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2465 Matcher::regName[dst_first],
2466 Matcher::regName[src_first]);
2467 #endif
2468 }
2469 }
2470 return 0;
2471 } else if (dst_first_rc == rc_kreg) {
2472 assert(false, "Illegal spilling");
2473 return 0;
2474 }
2475 } else if (src_first_rc == rc_kreg) {
2476 if (dst_first_rc == rc_stack) {
2477 // mem -> kreg
2478 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2479 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2480 // 64-bit
2481 int offset = ra_->reg2offset(dst_first);
2482 if (masm) {
2483 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2484 #ifndef PRODUCT
2485 } else {
2486 st->print("kmovq [rsp + #%d] , %s\t# spill",
2487 offset,
2488 Matcher::regName[src_first]);
2489 #endif
2490 }
2491 }
2492 return 0;
2493 } else if (dst_first_rc == rc_int) {
2494 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2495 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2496 // 64-bit
2497 if (masm) {
2498 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2499 #ifndef PRODUCT
2500 } else {
2501 st->print("kmovq %s, %s\t# spill",
2502 Matcher::regName[dst_first],
2503 Matcher::regName[src_first]);
2504 #endif
2505 }
2506 }
2507 Unimplemented();
2508 return 0;
2509 } else if (dst_first_rc == rc_kreg) {
2510 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2511 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2512 // 64-bit
2513 if (masm) {
2514 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2515 #ifndef PRODUCT
2516 } else {
2517 st->print("kmovq %s, %s\t# spill",
2518 Matcher::regName[dst_first],
2519 Matcher::regName[src_first]);
2520 #endif
2521 }
2522 }
2523 return 0;
2524 } else if (dst_first_rc == rc_float) {
2525 assert(false, "Illegal spill");
2526 return 0;
2527 }
2528 }
2529
2530 assert(0," foo ");
2531 Unimplemented();
2532 return 0;
2533 }
2534
2535 #ifndef PRODUCT
2536 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2537 implementation(nullptr, ra_, false, st);
2538 }
2539 #endif
2540
2541 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2542 implementation(masm, ra_, false, nullptr);
2543 }
2544
2545 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2546 return MachNode::size(ra_);
2547 }
2548
2549 //=============================================================================
2550 #ifndef PRODUCT
2551 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2552 {
2553 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2554 int reg = ra_->get_reg_first(this);
2555 st->print("leaq %s, [rsp + #%d]\t# box lock",
2556 Matcher::regName[reg], offset);
2557 }
2558 #endif
2559
2560 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2561 {
2562 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2563 int reg = ra_->get_encode(this);
2564
2565 __ lea(as_Register(reg), Address(rsp, offset));
2566 }
2567
2568 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2569 {
2570 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2571 if (ra_->get_encode(this) > 15) {
2572 return (offset < 0x80) ? 6 : 9; // REX2
2573 } else {
2574 return (offset < 0x80) ? 5 : 8; // REX
2575 }
2576 }
2577
2578 //=============================================================================
2579 #ifndef PRODUCT
2580 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2581 {
2582 st->print_cr("MachVEPNode");
2583 }
2584 #endif
2585
2586 void MachVEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 CodeBuffer* cbuf = masm->code();
2589 uint insts_size = cbuf->insts_size();
2590 if (!_verified) {
2591 __ ic_check(1);
2592 } else {
2593 // TODO 8284443 Avoid creation of temporary frame
2594 if (ra_->C->stub_function() == nullptr) {
2595 __ verified_entry(ra_->C, 0);
2596 __ entry_barrier();
2597 int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
2598 __ remove_frame(initial_framesize, false);
2599 }
2600 // Unpack inline type args passed as oop and then jump to
2601 // the verified entry point (skipping the unverified entry).
2602 int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
2603 // Emit code for verified entry and save increment for stack repair on return
2604 __ verified_entry(ra_->C, sp_inc);
2605 if (Compile::current()->output()->in_scratch_emit_size()) {
2606 Label dummy_verified_entry;
2607 __ jmp(dummy_verified_entry);
2608 } else {
2609 __ jmp(*_verified_entry);
2610 }
2611 }
2612 /* WARNING these NOPs are critical so that verified entry point is properly
2613 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
2614 int nops_cnt = 4 - ((cbuf->insts_size() - insts_size) & 0x3);
2615 nops_cnt &= 0x3; // Do not add nops if code is aligned.
2616 if (nops_cnt > 0) {
2617 __ nop(nops_cnt);
2618 }
2619 }
2620
2621 //=============================================================================
2622 #ifndef PRODUCT
2623 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2624 {
2625 if (UseCompressedClassPointers) {
2626 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2627 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2628 } else {
2629 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2630 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2631 }
2632 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2633 }
2634 #endif
2635
2636 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2637 {
2638 __ ic_check(InteriorEntryAlignment);
2639 }
2640
2641
2642 //=============================================================================
2643
2644 bool Matcher::supports_vector_calling_convention(void) {
2645 return EnableVectorSupport;
2646 }
2647
2648 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2649 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2650 }
2651
2652 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2653 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2654 }
2655
2656 #ifdef ASSERT
2657 static bool is_ndd_demotable(const MachNode* mdef) {
2658 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2659 }
2660 #endif
2661
2662 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2663 int oper_index) {
2664 if (mdef == nullptr) {
2665 return false;
2666 }
2667
2668 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2669 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2670 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2671 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2672 return false;
2673 }
2674
2675 // Complex memory operand covers multiple incoming edges needed for
2676 // address computation. Biasing def towards any address component will not
2677 // result in NDD demotion by assembler.
2678 if (mdef->operand_num_edges(oper_index) != 1) {
2679 return false;
2680 }
2681
2682 // Demotion candidate must be register mask compatible with definition.
2683 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2684 if (!oper_mask.overlap(mdef->out_RegMask())) {
2685 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2686 return false;
2687 }
2688
2689 switch (oper_index) {
2690 // First operand of MachNode corresponding to Intel APX NDD selection
2691 // pattern can share its assigned register with definition operand if
2692 // their live ranges do not overlap. In such a scenario we can demote
2693 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2694 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2695 // are decorated with a special flag by instruction selector.
2696 case 1:
2697 return is_ndd_demotable_opr1(mdef);
2698
2699 // Definition operand of commutative operation can be biased towards second
2700 // operand.
2701 case 2:
2702 return is_ndd_demotable_opr2(mdef);
2703
2704 // Current scheme only selects up to two biasing candidates
2705 default:
2706 assert(false, "unhandled operand index: %s", mdef->Name());
2707 break;
2708 }
2709
2710 return false;
2711 }
2712
2713 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2714 assert(EnableVectorSupport, "sanity");
2715 int lo = XMM0_num;
2716 int hi = XMM0b_num;
2717 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2718 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2719 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2720 return OptoRegPair(hi, lo);
2721 }
2722
2723 // Is this branch offset short enough that a short branch can be used?
2724 //
2725 // NOTE: If the platform does not provide any short branch variants, then
2726 // this method should return false for offset 0.
2727 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2728 // The passed offset is relative to address of the branch.
2729 // On 86 a branch displacement is calculated relative to address
2730 // of a next instruction.
2731 offset -= br_size;
2732
2733 // the short version of jmpConUCF2 contains multiple branches,
2734 // making the reach slightly less
2735 if (rule == jmpConUCF2_rule)
2736 return (-126 <= offset && offset <= 125);
2737 return (-128 <= offset && offset <= 127);
2738 }
2739
2740 // Return whether or not this register is ever used as an argument.
2741 // This function is used on startup to build the trampoline stubs in
2742 // generateOptoStub. Registers not mentioned will be killed by the VM
2743 // call in the trampoline, and arguments in those registers not be
2744 // available to the callee.
2745 bool Matcher::can_be_java_arg(int reg)
2746 {
2747 return
2748 reg == RDI_num || reg == RDI_H_num ||
2749 reg == RSI_num || reg == RSI_H_num ||
2750 reg == RDX_num || reg == RDX_H_num ||
2751 reg == RCX_num || reg == RCX_H_num ||
2752 reg == R8_num || reg == R8_H_num ||
2753 reg == R9_num || reg == R9_H_num ||
2754 reg == R12_num || reg == R12_H_num ||
2755 reg == XMM0_num || reg == XMM0b_num ||
2756 reg == XMM1_num || reg == XMM1b_num ||
2757 reg == XMM2_num || reg == XMM2b_num ||
2758 reg == XMM3_num || reg == XMM3b_num ||
2759 reg == XMM4_num || reg == XMM4b_num ||
2760 reg == XMM5_num || reg == XMM5b_num ||
2761 reg == XMM6_num || reg == XMM6b_num ||
2762 reg == XMM7_num || reg == XMM7b_num;
2763 }
2764
2765 bool Matcher::is_spillable_arg(int reg)
2766 {
2767 return can_be_java_arg(reg);
2768 }
2769
2770 uint Matcher::int_pressure_limit()
2771 {
2772 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2773 }
2774
2775 uint Matcher::float_pressure_limit()
2776 {
2777 // After experiment around with different values, the following default threshold
2778 // works best for LCM's register pressure scheduling on x64.
2779 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2780 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2781 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2782 }
2783
2784 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2785 // In 64 bit mode a code which use multiply when
2786 // devisor is constant is faster than hardware
2787 // DIV instruction (it uses MulHiL).
2788 return false;
2789 }
2790
2791 // Register for DIVI projection of divmodI
2792 const RegMask& Matcher::divI_proj_mask() {
2793 return INT_RAX_REG_mask();
2794 }
2795
2796 // Register for MODI projection of divmodI
2797 const RegMask& Matcher::modI_proj_mask() {
2798 return INT_RDX_REG_mask();
2799 }
2800
2801 // Register for DIVL projection of divmodL
2802 const RegMask& Matcher::divL_proj_mask() {
2803 return LONG_RAX_REG_mask();
2804 }
2805
2806 // Register for MODL projection of divmodL
2807 const RegMask& Matcher::modL_proj_mask() {
2808 return LONG_RDX_REG_mask();
2809 }
2810
2811 %}
2812
2813 source_hpp %{
2814 // Header information of the source block.
2815 // Method declarations/definitions which are used outside
2816 // the ad-scope can conveniently be defined here.
2817 //
2818 // To keep related declarations/definitions/uses close together,
2819 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2820
2821 #include "runtime/vm_version.hpp"
2822
2823 class NativeJump;
2824
2825 class CallStubImpl {
2826
2827 //--------------------------------------------------------------
2828 //---< Used for optimization in Compile::shorten_branches >---
2829 //--------------------------------------------------------------
2830
2831 public:
2832 // Size of call trampoline stub.
2833 static uint size_call_trampoline() {
2834 return 0; // no call trampolines on this platform
2835 }
2836
2837 // number of relocations needed by a call trampoline stub
2838 static uint reloc_call_trampoline() {
2839 return 0; // no call trampolines on this platform
2840 }
2841 };
2842
2843 class HandlerImpl {
2844
2845 public:
2846
2847 static int emit_deopt_handler(C2_MacroAssembler* masm);
2848
2849 static uint size_deopt_handler() {
2850 // one call and one jmp.
2851 return 7;
2852 }
2853 };
2854
2855 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2856 switch(bytes) {
2857 case 4: // fall-through
2858 case 8: // fall-through
2859 case 16: return Assembler::AVX_128bit;
2860 case 32: return Assembler::AVX_256bit;
2861 case 64: return Assembler::AVX_512bit;
2862
2863 default: {
2864 ShouldNotReachHere();
2865 return Assembler::AVX_NoVec;
2866 }
2867 }
2868 }
2869
2870 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2871 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2872 }
2873
2874 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2875 uint def_idx = use->operand_index(opnd);
2876 Node* def = use->in(def_idx);
2877 return vector_length_encoding(def);
2878 }
2879
2880 static inline bool is_vector_popcount_predicate(BasicType bt) {
2881 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2882 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2883 }
2884
2885 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2886 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2887 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2888 }
2889
2890 class Node::PD {
2891 public:
2892 enum NodeFlags : uint64_t {
2893 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2894 Flag_sets_carry_flag = Node::_last_flag << 2,
2895 Flag_sets_parity_flag = Node::_last_flag << 3,
2896 Flag_sets_zero_flag = Node::_last_flag << 4,
2897 Flag_sets_overflow_flag = Node::_last_flag << 5,
2898 Flag_sets_sign_flag = Node::_last_flag << 6,
2899 Flag_clears_carry_flag = Node::_last_flag << 7,
2900 Flag_clears_parity_flag = Node::_last_flag << 8,
2901 Flag_clears_zero_flag = Node::_last_flag << 9,
2902 Flag_clears_overflow_flag = Node::_last_flag << 10,
2903 Flag_clears_sign_flag = Node::_last_flag << 11,
2904 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2905 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2906 _last_flag = Flag_ndd_demotable_opr2
2907 };
2908 };
2909
2910 %} // end source_hpp
2911
2912 source %{
2913
2914 #include "opto/addnode.hpp"
2915 #include "c2_intelJccErratum_x86.hpp"
2916
2917 void PhaseOutput::pd_perform_mach_node_analysis() {
2918 if (VM_Version::has_intel_jcc_erratum()) {
2919 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2920 _buf_sizes._code += extra_padding;
2921 }
2922 }
2923
2924 int MachNode::pd_alignment_required() const {
2925 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2926 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2927 return IntelJccErratum::largest_jcc_size() + 1;
2928 } else {
2929 return 1;
2930 }
2931 }
2932
2933 int MachNode::compute_padding(int current_offset) const {
2934 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2935 Compile* C = Compile::current();
2936 PhaseOutput* output = C->output();
2937 Block* block = output->block();
2938 int index = output->index();
2939 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2940 } else {
2941 return 0;
2942 }
2943 }
2944
2945 // Emit deopt handler code.
2946 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2947
2948 // Note that the code buffer's insts_mark is always relative to insts.
2949 // That's why we must use the macroassembler to generate a handler.
2950 address base = __ start_a_stub(size_deopt_handler());
2951 if (base == nullptr) {
2952 ciEnv::current()->record_failure("CodeCache is full");
2953 return 0; // CodeBuffer::expand failed
2954 }
2955 int offset = __ offset();
2956
2957 Label start;
2958 __ bind(start);
2959
2960 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2961
2962 int entry_offset = __ offset();
2963
2964 __ jmp(start);
2965
2966 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2967 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2968 "out of bounds read in post-call NOP check");
2969 __ end_a_stub();
2970 return entry_offset;
2971 }
2972
2973 static Assembler::Width widthForType(BasicType bt) {
2974 if (bt == T_BYTE) {
2975 return Assembler::B;
2976 } else if (bt == T_SHORT) {
2977 return Assembler::W;
2978 } else if (bt == T_INT) {
2979 return Assembler::D;
2980 } else {
2981 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2982 return Assembler::Q;
2983 }
2984 }
2985
2986 //=============================================================================
2987
2988 // Float masks come from different places depending on platform.
2989 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2990 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2991 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2992 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2993 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2994 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2995 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2996 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2997 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2998 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2999 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
3000 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
3001 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
3002 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
3003 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
3004 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
3005 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
3006 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
3007 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
3008
3009 //=============================================================================
3010 bool Matcher::match_rule_supported(int opcode) {
3011 if (!has_match_rule(opcode)) {
3012 return false; // no match rule present
3013 }
3014 switch (opcode) {
3015 case Op_AbsVL:
3016 case Op_StoreVectorScatter:
3017 if (UseAVX < 3) {
3018 return false;
3019 }
3020 break;
3021 case Op_PopCountI:
3022 case Op_PopCountL:
3023 if (!UsePopCountInstruction) {
3024 return false;
3025 }
3026 break;
3027 case Op_PopCountVI:
3028 if (UseAVX < 2) {
3029 return false;
3030 }
3031 break;
3032 case Op_CompressV:
3033 case Op_ExpandV:
3034 case Op_PopCountVL:
3035 if (UseAVX < 2) {
3036 return false;
3037 }
3038 break;
3039 case Op_MulVI:
3040 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3041 return false;
3042 }
3043 break;
3044 case Op_MulVL:
3045 if (UseSSE < 4) { // only with SSE4_1 or AVX
3046 return false;
3047 }
3048 break;
3049 case Op_MulReductionVL:
3050 if (VM_Version::supports_avx512dq() == false) {
3051 return false;
3052 }
3053 break;
3054 case Op_AbsVB:
3055 case Op_AbsVS:
3056 case Op_AbsVI:
3057 case Op_AddReductionVI:
3058 case Op_AndReductionV:
3059 case Op_OrReductionV:
3060 case Op_XorReductionV:
3061 if (UseSSE < 3) { // requires at least SSSE3
3062 return false;
3063 }
3064 break;
3065 case Op_MaxHF:
3066 case Op_MinHF:
3067 if (!VM_Version::supports_avx512vlbw()) {
3068 return false;
3069 } // fallthrough
3070 case Op_AddHF:
3071 case Op_DivHF:
3072 case Op_FmaHF:
3073 case Op_MulHF:
3074 case Op_ReinterpretS2HF:
3075 case Op_ReinterpretHF2S:
3076 case Op_SubHF:
3077 case Op_SqrtHF:
3078 if (!VM_Version::supports_avx512_fp16()) {
3079 return false;
3080 }
3081 break;
3082 case Op_VectorLoadShuffle:
3083 case Op_VectorRearrange:
3084 case Op_MulReductionVI:
3085 if (UseSSE < 4) { // requires at least SSE4
3086 return false;
3087 }
3088 break;
3089 case Op_IsInfiniteF:
3090 case Op_IsInfiniteD:
3091 if (!VM_Version::supports_avx512dq()) {
3092 return false;
3093 }
3094 break;
3095 case Op_SqrtVD:
3096 case Op_SqrtVF:
3097 case Op_VectorMaskCmp:
3098 case Op_VectorCastB2X:
3099 case Op_VectorCastS2X:
3100 case Op_VectorCastI2X:
3101 case Op_VectorCastL2X:
3102 case Op_VectorCastF2X:
3103 case Op_VectorCastD2X:
3104 case Op_VectorUCastB2X:
3105 case Op_VectorUCastS2X:
3106 case Op_VectorUCastI2X:
3107 case Op_VectorMaskCast:
3108 if (UseAVX < 1) { // enabled for AVX only
3109 return false;
3110 }
3111 break;
3112 case Op_PopulateIndex:
3113 if (UseAVX < 2) {
3114 return false;
3115 }
3116 break;
3117 case Op_RoundVF:
3118 if (UseAVX < 2) { // enabled for AVX2 only
3119 return false;
3120 }
3121 break;
3122 case Op_RoundVD:
3123 if (UseAVX < 3) {
3124 return false; // enabled for AVX3 only
3125 }
3126 break;
3127 case Op_CompareAndSwapL:
3128 case Op_CompareAndSwapP:
3129 break;
3130 case Op_StrIndexOf:
3131 if (!UseSSE42Intrinsics) {
3132 return false;
3133 }
3134 break;
3135 case Op_StrIndexOfChar:
3136 if (!UseSSE42Intrinsics) {
3137 return false;
3138 }
3139 break;
3140 case Op_OnSpinWait:
3141 if (VM_Version::supports_on_spin_wait() == false) {
3142 return false;
3143 }
3144 break;
3145 case Op_MulVB:
3146 case Op_LShiftVB:
3147 case Op_RShiftVB:
3148 case Op_URShiftVB:
3149 case Op_VectorInsert:
3150 case Op_VectorLoadMask:
3151 case Op_VectorStoreMask:
3152 case Op_VectorBlend:
3153 if (UseSSE < 4) {
3154 return false;
3155 }
3156 break;
3157 case Op_MaxD:
3158 case Op_MaxF:
3159 case Op_MinD:
3160 case Op_MinF:
3161 if (UseAVX < 1) { // enabled for AVX only
3162 return false;
3163 }
3164 break;
3165 case Op_CacheWB:
3166 case Op_CacheWBPreSync:
3167 case Op_CacheWBPostSync:
3168 if (!VM_Version::supports_data_cache_line_flush()) {
3169 return false;
3170 }
3171 break;
3172 case Op_ExtractB:
3173 case Op_ExtractL:
3174 case Op_ExtractI:
3175 case Op_RoundDoubleMode:
3176 if (UseSSE < 4) {
3177 return false;
3178 }
3179 break;
3180 case Op_RoundDoubleModeV:
3181 if (VM_Version::supports_avx() == false) {
3182 return false; // 128bit vroundpd is not available
3183 }
3184 break;
3185 case Op_LoadVectorGather:
3186 case Op_LoadVectorGatherMasked:
3187 if (UseAVX < 2) {
3188 return false;
3189 }
3190 break;
3191 case Op_FmaF:
3192 case Op_FmaD:
3193 case Op_FmaVD:
3194 case Op_FmaVF:
3195 if (!UseFMA) {
3196 return false;
3197 }
3198 break;
3199 case Op_MacroLogicV:
3200 if (UseAVX < 3 || !UseVectorMacroLogic) {
3201 return false;
3202 }
3203 break;
3204
3205 case Op_VectorCmpMasked:
3206 case Op_VectorMaskGen:
3207 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3208 return false;
3209 }
3210 break;
3211 case Op_VectorMaskFirstTrue:
3212 case Op_VectorMaskLastTrue:
3213 case Op_VectorMaskTrueCount:
3214 case Op_VectorMaskToLong:
3215 if (UseAVX < 1) {
3216 return false;
3217 }
3218 break;
3219 case Op_RoundF:
3220 case Op_RoundD:
3221 break;
3222 case Op_CopySignD:
3223 case Op_CopySignF:
3224 if (UseAVX < 3) {
3225 return false;
3226 }
3227 if (!VM_Version::supports_avx512vl()) {
3228 return false;
3229 }
3230 break;
3231 case Op_CompressBits:
3232 case Op_ExpandBits:
3233 if (!VM_Version::supports_bmi2()) {
3234 return false;
3235 }
3236 break;
3237 case Op_CompressM:
3238 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3239 return false;
3240 }
3241 break;
3242 case Op_ConvF2HF:
3243 case Op_ConvHF2F:
3244 if (!VM_Version::supports_float16()) {
3245 return false;
3246 }
3247 break;
3248 case Op_VectorCastF2HF:
3249 case Op_VectorCastHF2F:
3250 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3251 return false;
3252 }
3253 break;
3254 }
3255 return true; // Match rules are supported by default.
3256 }
3257
3258 //------------------------------------------------------------------------
3259
3260 static inline bool is_pop_count_instr_target(BasicType bt) {
3261 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3262 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3263 }
3264
3265 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3266 return match_rule_supported_vector(opcode, vlen, bt);
3267 }
3268
3269 // Identify extra cases that we might want to provide match rules for vector nodes and
3270 // other intrinsics guarded with vector length (vlen) and element type (bt).
3271 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3272 if (!match_rule_supported(opcode)) {
3273 return false;
3274 }
3275 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3276 // * SSE2 supports 128bit vectors for all types;
3277 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3278 // * AVX2 supports 256bit vectors for all types;
3279 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3280 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3281 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3282 // And MaxVectorSize is taken into account as well.
3283 if (!vector_size_supported(bt, vlen)) {
3284 return false;
3285 }
3286 // Special cases which require vector length follow:
3287 // * implementation limitations
3288 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3289 // * 128bit vroundpd instruction is present only in AVX1
3290 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3291 switch (opcode) {
3292 case Op_MaxVHF:
3293 case Op_MinVHF:
3294 if (!VM_Version::supports_avx512bw()) {
3295 return false;
3296 }
3297 case Op_AddVHF:
3298 case Op_DivVHF:
3299 case Op_FmaVHF:
3300 case Op_MulVHF:
3301 case Op_SubVHF:
3302 case Op_SqrtVHF:
3303 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3304 return false;
3305 }
3306 if (!VM_Version::supports_avx512_fp16()) {
3307 return false;
3308 }
3309 break;
3310 case Op_AbsVF:
3311 case Op_NegVF:
3312 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3313 return false; // 512bit vandps and vxorps are not available
3314 }
3315 break;
3316 case Op_AbsVD:
3317 case Op_NegVD:
3318 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3319 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3320 }
3321 break;
3322 case Op_RotateRightV:
3323 case Op_RotateLeftV:
3324 if (bt != T_INT && bt != T_LONG) {
3325 return false;
3326 } // fallthrough
3327 case Op_MacroLogicV:
3328 if (!VM_Version::supports_evex() ||
3329 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3330 return false;
3331 }
3332 break;
3333 case Op_ClearArray:
3334 case Op_VectorMaskGen:
3335 case Op_VectorCmpMasked:
3336 if (!VM_Version::supports_avx512bw()) {
3337 return false;
3338 }
3339 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3340 return false;
3341 }
3342 break;
3343 case Op_LoadVectorMasked:
3344 case Op_StoreVectorMasked:
3345 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3346 return false;
3347 }
3348 break;
3349 case Op_UMinV:
3350 case Op_UMaxV:
3351 if (UseAVX == 0) {
3352 return false;
3353 }
3354 break;
3355 case Op_MaxV:
3356 case Op_MinV:
3357 if (UseSSE < 4 && is_integral_type(bt)) {
3358 return false;
3359 }
3360 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3361 // Float/Double intrinsics are enabled for AVX family currently.
3362 if (UseAVX == 0) {
3363 return false;
3364 }
3365 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3366 return false;
3367 }
3368 }
3369 break;
3370 case Op_CallLeafVector:
3371 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3372 return false;
3373 }
3374 break;
3375 case Op_AddReductionVI:
3376 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3377 return false;
3378 }
3379 // fallthrough
3380 case Op_AndReductionV:
3381 case Op_OrReductionV:
3382 case Op_XorReductionV:
3383 if (is_subword_type(bt) && (UseSSE < 4)) {
3384 return false;
3385 }
3386 break;
3387 case Op_MinReductionV:
3388 case Op_MaxReductionV:
3389 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3390 return false;
3391 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3392 return false;
3393 }
3394 // Float/Double intrinsics enabled for AVX family.
3395 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3396 return false;
3397 }
3398 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3399 return false;
3400 }
3401 break;
3402 case Op_VectorBlend:
3403 if (UseAVX == 0 && size_in_bits < 128) {
3404 return false;
3405 }
3406 break;
3407 case Op_VectorTest:
3408 if (UseSSE < 4) {
3409 return false; // Implementation limitation
3410 } else if (size_in_bits < 32) {
3411 return false; // Implementation limitation
3412 }
3413 break;
3414 case Op_VectorLoadShuffle:
3415 case Op_VectorRearrange:
3416 if(vlen == 2) {
3417 return false; // Implementation limitation due to how shuffle is loaded
3418 } else if (size_in_bits == 256 && UseAVX < 2) {
3419 return false; // Implementation limitation
3420 }
3421 break;
3422 case Op_VectorLoadMask:
3423 case Op_VectorMaskCast:
3424 if (size_in_bits == 256 && UseAVX < 2) {
3425 return false; // Implementation limitation
3426 }
3427 // fallthrough
3428 case Op_VectorStoreMask:
3429 if (vlen == 2) {
3430 return false; // Implementation limitation
3431 }
3432 break;
3433 case Op_PopulateIndex:
3434 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3435 return false;
3436 }
3437 break;
3438 case Op_VectorCastB2X:
3439 case Op_VectorCastS2X:
3440 case Op_VectorCastI2X:
3441 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3442 return false;
3443 }
3444 break;
3445 case Op_VectorCastL2X:
3446 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3447 return false;
3448 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3449 return false;
3450 }
3451 break;
3452 case Op_VectorCastF2X: {
3453 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3454 // happen after intermediate conversion to integer and special handling
3455 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3456 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3457 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3458 return false;
3459 }
3460 }
3461 // fallthrough
3462 case Op_VectorCastD2X:
3463 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3464 return false;
3465 }
3466 break;
3467 case Op_VectorCastF2HF:
3468 case Op_VectorCastHF2F:
3469 if (!VM_Version::supports_f16c() &&
3470 ((!VM_Version::supports_evex() ||
3471 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3472 return false;
3473 }
3474 break;
3475 case Op_RoundVD:
3476 if (!VM_Version::supports_avx512dq()) {
3477 return false;
3478 }
3479 break;
3480 case Op_MulReductionVI:
3481 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3482 return false;
3483 }
3484 break;
3485 case Op_LoadVectorGatherMasked:
3486 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3487 return false;
3488 }
3489 if (is_subword_type(bt) &&
3490 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3491 (size_in_bits < 64) ||
3492 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3493 return false;
3494 }
3495 break;
3496 case Op_StoreVectorScatterMasked:
3497 case Op_StoreVectorScatter:
3498 if (is_subword_type(bt)) {
3499 return false;
3500 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3501 return false;
3502 }
3503 // fallthrough
3504 case Op_LoadVectorGather:
3505 if (!is_subword_type(bt) && size_in_bits == 64) {
3506 return false;
3507 }
3508 if (is_subword_type(bt) && size_in_bits < 64) {
3509 return false;
3510 }
3511 break;
3512 case Op_SaturatingAddV:
3513 case Op_SaturatingSubV:
3514 if (UseAVX < 1) {
3515 return false; // Implementation limitation
3516 }
3517 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3518 return false;
3519 }
3520 break;
3521 case Op_SelectFromTwoVector:
3522 if (size_in_bits < 128) {
3523 return false;
3524 }
3525 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3526 return false;
3527 }
3528 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3529 return false;
3530 }
3531 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3532 return false;
3533 }
3534 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3535 return false;
3536 }
3537 break;
3538 case Op_MaskAll:
3539 if (!VM_Version::supports_evex()) {
3540 return false;
3541 }
3542 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3543 return false;
3544 }
3545 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3546 return false;
3547 }
3548 break;
3549 case Op_VectorMaskCmp:
3550 if (vlen < 2 || size_in_bits < 32) {
3551 return false;
3552 }
3553 break;
3554 case Op_CompressM:
3555 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3556 return false;
3557 }
3558 break;
3559 case Op_CompressV:
3560 case Op_ExpandV:
3561 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3562 return false;
3563 }
3564 if (size_in_bits < 128 ) {
3565 return false;
3566 }
3567 case Op_VectorLongToMask:
3568 if (UseAVX < 1) {
3569 return false;
3570 }
3571 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3572 return false;
3573 }
3574 break;
3575 case Op_SignumVD:
3576 case Op_SignumVF:
3577 if (UseAVX < 1) {
3578 return false;
3579 }
3580 break;
3581 case Op_PopCountVI:
3582 case Op_PopCountVL: {
3583 if (!is_pop_count_instr_target(bt) &&
3584 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3585 return false;
3586 }
3587 }
3588 break;
3589 case Op_ReverseV:
3590 case Op_ReverseBytesV:
3591 if (UseAVX < 2) {
3592 return false;
3593 }
3594 break;
3595 case Op_CountTrailingZerosV:
3596 case Op_CountLeadingZerosV:
3597 if (UseAVX < 2) {
3598 return false;
3599 }
3600 break;
3601 }
3602 return true; // Per default match rules are supported.
3603 }
3604
3605 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3606 // ADLC based match_rule_supported routine checks for the existence of pattern based
3607 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3608 // of their non-masked counterpart with mask edge being the differentiator.
3609 // This routine does a strict check on the existence of masked operation patterns
3610 // by returning a default false value for all the other opcodes apart from the
3611 // ones whose masked instruction patterns are defined in this file.
3612 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3613 return false;
3614 }
3615
3616 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3617 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3618 return false;
3619 }
3620 switch(opcode) {
3621 // Unary masked operations
3622 case Op_AbsVB:
3623 case Op_AbsVS:
3624 if(!VM_Version::supports_avx512bw()) {
3625 return false; // Implementation limitation
3626 }
3627 case Op_AbsVI:
3628 case Op_AbsVL:
3629 return true;
3630
3631 // Ternary masked operations
3632 case Op_FmaVF:
3633 case Op_FmaVD:
3634 return true;
3635
3636 case Op_MacroLogicV:
3637 if(bt != T_INT && bt != T_LONG) {
3638 return false;
3639 }
3640 return true;
3641
3642 // Binary masked operations
3643 case Op_AddVB:
3644 case Op_AddVS:
3645 case Op_SubVB:
3646 case Op_SubVS:
3647 case Op_MulVS:
3648 case Op_LShiftVS:
3649 case Op_RShiftVS:
3650 case Op_URShiftVS:
3651 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3652 if (!VM_Version::supports_avx512bw()) {
3653 return false; // Implementation limitation
3654 }
3655 return true;
3656
3657 case Op_MulVL:
3658 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3659 if (!VM_Version::supports_avx512dq()) {
3660 return false; // Implementation limitation
3661 }
3662 return true;
3663
3664 case Op_AndV:
3665 case Op_OrV:
3666 case Op_XorV:
3667 case Op_RotateRightV:
3668 case Op_RotateLeftV:
3669 if (bt != T_INT && bt != T_LONG) {
3670 return false; // Implementation limitation
3671 }
3672 return true;
3673
3674 case Op_VectorLoadMask:
3675 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3676 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3677 return false;
3678 }
3679 return true;
3680
3681 case Op_AddVI:
3682 case Op_AddVL:
3683 case Op_AddVF:
3684 case Op_AddVD:
3685 case Op_SubVI:
3686 case Op_SubVL:
3687 case Op_SubVF:
3688 case Op_SubVD:
3689 case Op_MulVI:
3690 case Op_MulVF:
3691 case Op_MulVD:
3692 case Op_DivVF:
3693 case Op_DivVD:
3694 case Op_SqrtVF:
3695 case Op_SqrtVD:
3696 case Op_LShiftVI:
3697 case Op_LShiftVL:
3698 case Op_RShiftVI:
3699 case Op_RShiftVL:
3700 case Op_URShiftVI:
3701 case Op_URShiftVL:
3702 case Op_LoadVectorMasked:
3703 case Op_StoreVectorMasked:
3704 case Op_LoadVectorGatherMasked:
3705 case Op_StoreVectorScatterMasked:
3706 return true;
3707
3708 case Op_UMinV:
3709 case Op_UMaxV:
3710 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3711 return false;
3712 } // fallthrough
3713 case Op_MaxV:
3714 case Op_MinV:
3715 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3716 return false; // Implementation limitation
3717 }
3718 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3719 return false; // Implementation limitation
3720 }
3721 return true;
3722 case Op_SaturatingAddV:
3723 case Op_SaturatingSubV:
3724 if (!is_subword_type(bt)) {
3725 return false;
3726 }
3727 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 return true;
3731
3732 case Op_VectorMaskCmp:
3733 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3734 return false; // Implementation limitation
3735 }
3736 return true;
3737
3738 case Op_VectorRearrange:
3739 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3740 return false; // Implementation limitation
3741 }
3742 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3743 return false; // Implementation limitation
3744 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3745 return false; // Implementation limitation
3746 }
3747 return true;
3748
3749 // Binary Logical operations
3750 case Op_AndVMask:
3751 case Op_OrVMask:
3752 case Op_XorVMask:
3753 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3754 return false; // Implementation limitation
3755 }
3756 return true;
3757
3758 case Op_PopCountVI:
3759 case Op_PopCountVL:
3760 if (!is_pop_count_instr_target(bt)) {
3761 return false;
3762 }
3763 return true;
3764
3765 case Op_MaskAll:
3766 return true;
3767
3768 case Op_CountLeadingZerosV:
3769 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3770 return true;
3771 }
3772 default:
3773 return false;
3774 }
3775 }
3776
3777 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3778 return false;
3779 }
3780
3781 // Return true if Vector::rearrange needs preparation of the shuffle argument
3782 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3783 switch (elem_bt) {
3784 case T_BYTE: return false;
3785 case T_SHORT: return !VM_Version::supports_avx512bw();
3786 case T_INT: return !VM_Version::supports_avx();
3787 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3788 default:
3789 ShouldNotReachHere();
3790 return false;
3791 }
3792 }
3793
3794 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3795 // Prefer predicate if the mask type is "TypeVectMask".
3796 return vt->isa_vectmask() != nullptr;
3797 }
3798
3799 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3800 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3801 bool legacy = (generic_opnd->opcode() == LEGVEC);
3802 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3803 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3804 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3805 return new legVecZOper();
3806 }
3807 if (legacy) {
3808 switch (ideal_reg) {
3809 case Op_VecS: return new legVecSOper();
3810 case Op_VecD: return new legVecDOper();
3811 case Op_VecX: return new legVecXOper();
3812 case Op_VecY: return new legVecYOper();
3813 case Op_VecZ: return new legVecZOper();
3814 }
3815 } else {
3816 switch (ideal_reg) {
3817 case Op_VecS: return new vecSOper();
3818 case Op_VecD: return new vecDOper();
3819 case Op_VecX: return new vecXOper();
3820 case Op_VecY: return new vecYOper();
3821 case Op_VecZ: return new vecZOper();
3822 }
3823 }
3824 ShouldNotReachHere();
3825 return nullptr;
3826 }
3827
3828 bool Matcher::is_reg2reg_move(MachNode* m) {
3829 switch (m->rule()) {
3830 case MoveVec2Leg_rule:
3831 case MoveLeg2Vec_rule:
3832 case MoveF2VL_rule:
3833 case MoveF2LEG_rule:
3834 case MoveVL2F_rule:
3835 case MoveLEG2F_rule:
3836 case MoveD2VL_rule:
3837 case MoveD2LEG_rule:
3838 case MoveVL2D_rule:
3839 case MoveLEG2D_rule:
3840 return true;
3841 default:
3842 return false;
3843 }
3844 }
3845
3846 bool Matcher::is_generic_vector(MachOper* opnd) {
3847 switch (opnd->opcode()) {
3848 case VEC:
3849 case LEGVEC:
3850 return true;
3851 default:
3852 return false;
3853 }
3854 }
3855
3856 //------------------------------------------------------------------------
3857
3858 const RegMask* Matcher::predicate_reg_mask(void) {
3859 return &_VECTMASK_REG_mask;
3860 }
3861
3862 // Max vector size in bytes. 0 if not supported.
3863 int Matcher::vector_width_in_bytes(BasicType bt) {
3864 assert(is_java_primitive(bt), "only primitive type vectors");
3865 // SSE2 supports 128bit vectors for all types.
3866 // AVX2 supports 256bit vectors for all types.
3867 // AVX2/EVEX supports 512bit vectors for all types.
3868 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3869 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3870 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3871 size = (UseAVX > 2) ? 64 : 32;
3872 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3873 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3874 // Use flag to limit vector size.
3875 size = MIN2(size,(int)MaxVectorSize);
3876 // Minimum 2 values in vector (or 4 for bytes).
3877 switch (bt) {
3878 case T_DOUBLE:
3879 case T_LONG:
3880 if (size < 16) return 0;
3881 break;
3882 case T_FLOAT:
3883 case T_INT:
3884 if (size < 8) return 0;
3885 break;
3886 case T_BOOLEAN:
3887 if (size < 4) return 0;
3888 break;
3889 case T_CHAR:
3890 if (size < 4) return 0;
3891 break;
3892 case T_BYTE:
3893 if (size < 4) return 0;
3894 break;
3895 case T_SHORT:
3896 if (size < 4) return 0;
3897 break;
3898 default:
3899 ShouldNotReachHere();
3900 }
3901 return size;
3902 }
3903
3904 // Limits on vector size (number of elements) loaded into vector.
3905 int Matcher::max_vector_size(const BasicType bt) {
3906 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3907 }
3908 int Matcher::min_vector_size(const BasicType bt) {
3909 int max_size = max_vector_size(bt);
3910 // Min size which can be loaded into vector is 4 bytes.
3911 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3912 // Support for calling svml double64 vectors
3913 if (bt == T_DOUBLE) {
3914 size = 1;
3915 }
3916 return MIN2(size,max_size);
3917 }
3918
3919 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3920 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3921 // by default on Cascade Lake
3922 if (VM_Version::is_default_intel_cascade_lake()) {
3923 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3924 }
3925 return Matcher::max_vector_size(bt);
3926 }
3927
3928 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3929 return -1;
3930 }
3931
3932 // Vector ideal reg corresponding to specified size in bytes
3933 uint Matcher::vector_ideal_reg(int size) {
3934 assert(MaxVectorSize >= size, "");
3935 switch(size) {
3936 case 4: return Op_VecS;
3937 case 8: return Op_VecD;
3938 case 16: return Op_VecX;
3939 case 32: return Op_VecY;
3940 case 64: return Op_VecZ;
3941 }
3942 ShouldNotReachHere();
3943 return 0;
3944 }
3945
3946 // Check for shift by small constant as well
3947 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3948 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3949 shift->in(2)->get_int() <= 3 &&
3950 // Are there other uses besides address expressions?
3951 !matcher->is_visited(shift)) {
3952 address_visited.set(shift->_idx); // Flag as address_visited
3953 mstack.push(shift->in(2), Matcher::Visit);
3954 Node *conv = shift->in(1);
3955 // Allow Matcher to match the rule which bypass
3956 // ConvI2L operation for an array index on LP64
3957 // if the index value is positive.
3958 if (conv->Opcode() == Op_ConvI2L &&
3959 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3960 // Are there other uses besides address expressions?
3961 !matcher->is_visited(conv)) {
3962 address_visited.set(conv->_idx); // Flag as address_visited
3963 mstack.push(conv->in(1), Matcher::Pre_Visit);
3964 } else {
3965 mstack.push(conv, Matcher::Pre_Visit);
3966 }
3967 return true;
3968 }
3969 return false;
3970 }
3971
3972 // This function identifies sub-graphs in which a 'load' node is
3973 // input to two different nodes, and such that it can be matched
3974 // with BMI instructions like blsi, blsr, etc.
3975 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3976 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3977 // refers to the same node.
3978 //
3979 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3980 // This is a temporary solution until we make DAGs expressible in ADL.
3981 template<typename ConType>
3982 class FusedPatternMatcher {
3983 Node* _op1_node;
3984 Node* _mop_node;
3985 int _con_op;
3986
3987 static int match_next(Node* n, int next_op, int next_op_idx) {
3988 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3989 return -1;
3990 }
3991
3992 if (next_op_idx == -1) { // n is commutative, try rotations
3993 if (n->in(1)->Opcode() == next_op) {
3994 return 1;
3995 } else if (n->in(2)->Opcode() == next_op) {
3996 return 2;
3997 }
3998 } else {
3999 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
4000 if (n->in(next_op_idx)->Opcode() == next_op) {
4001 return next_op_idx;
4002 }
4003 }
4004 return -1;
4005 }
4006
4007 public:
4008 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4009 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4010
4011 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4012 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4013 typename ConType::NativeType con_value) {
4014 if (_op1_node->Opcode() != op1) {
4015 return false;
4016 }
4017 if (_mop_node->outcnt() > 2) {
4018 return false;
4019 }
4020 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4021 if (op1_op2_idx == -1) {
4022 return false;
4023 }
4024 // Memory operation must be the other edge
4025 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4026
4027 // Check that the mop node is really what we want
4028 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4029 Node* op2_node = _op1_node->in(op1_op2_idx);
4030 if (op2_node->outcnt() > 1) {
4031 return false;
4032 }
4033 assert(op2_node->Opcode() == op2, "Should be");
4034 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4035 if (op2_con_idx == -1) {
4036 return false;
4037 }
4038 // Memory operation must be the other edge
4039 int op2_mop_idx = (op2_con_idx & 1) + 1;
4040 // Check that the memory operation is the same node
4041 if (op2_node->in(op2_mop_idx) == _mop_node) {
4042 // Now check the constant
4043 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4044 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4045 return true;
4046 }
4047 }
4048 }
4049 return false;
4050 }
4051 };
4052
4053 static bool is_bmi_pattern(Node* n, Node* m) {
4054 assert(UseBMI1Instructions, "sanity");
4055 if (n != nullptr && m != nullptr) {
4056 if (m->Opcode() == Op_LoadI) {
4057 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4058 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4059 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4060 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4061 } else if (m->Opcode() == Op_LoadL) {
4062 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4063 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4064 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4065 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4066 }
4067 }
4068 return false;
4069 }
4070
4071 // Should the matcher clone input 'm' of node 'n'?
4072 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4073 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4074 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4075 mstack.push(m, Visit);
4076 return true;
4077 }
4078 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4079 mstack.push(m, Visit); // m = ShiftCntV
4080 return true;
4081 }
4082 if (is_encode_and_store_pattern(n, m)) {
4083 mstack.push(m, Visit);
4084 return true;
4085 }
4086 return false;
4087 }
4088
4089 // Should the Matcher clone shifts on addressing modes, expecting them
4090 // to be subsumed into complex addressing expressions or compute them
4091 // into registers?
4092 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4093 Node *off = m->in(AddPNode::Offset);
4094 if (off->is_Con()) {
4095 address_visited.test_set(m->_idx); // Flag as address_visited
4096 Node *adr = m->in(AddPNode::Address);
4097
4098 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4099 // AtomicAdd is not an addressing expression.
4100 // Cheap to find it by looking for screwy base.
4101 if (adr->is_AddP() &&
4102 !adr->in(AddPNode::Base)->is_top() &&
4103 !adr->in(AddPNode::Offset)->is_Con() &&
4104 off->get_long() == (int) (off->get_long()) && // immL32
4105 // Are there other uses besides address expressions?
4106 !is_visited(adr)) {
4107 address_visited.set(adr->_idx); // Flag as address_visited
4108 Node *shift = adr->in(AddPNode::Offset);
4109 if (!clone_shift(shift, this, mstack, address_visited)) {
4110 mstack.push(shift, Pre_Visit);
4111 }
4112 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4113 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4114 } else {
4115 mstack.push(adr, Pre_Visit);
4116 }
4117
4118 // Clone X+offset as it also folds into most addressing expressions
4119 mstack.push(off, Visit);
4120 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4121 return true;
4122 } else if (clone_shift(off, this, mstack, address_visited)) {
4123 address_visited.test_set(m->_idx); // Flag as address_visited
4124 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4125 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4126 return true;
4127 }
4128 return false;
4129 }
4130
4131 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4132 switch (bt) {
4133 case BoolTest::eq:
4134 return Assembler::eq;
4135 case BoolTest::ne:
4136 return Assembler::neq;
4137 case BoolTest::le:
4138 case BoolTest::ule:
4139 return Assembler::le;
4140 case BoolTest::ge:
4141 case BoolTest::uge:
4142 return Assembler::nlt;
4143 case BoolTest::lt:
4144 case BoolTest::ult:
4145 return Assembler::lt;
4146 case BoolTest::gt:
4147 case BoolTest::ugt:
4148 return Assembler::nle;
4149 default : ShouldNotReachHere(); return Assembler::_false;
4150 }
4151 }
4152
4153 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4154 switch (bt) {
4155 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4156 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4157 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4158 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4159 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4160 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4161 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4162 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4163 }
4164 }
4165
4166 // Helper methods for MachSpillCopyNode::implementation().
4167 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4168 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4169 assert(ireg == Op_VecS || // 32bit vector
4170 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4171 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4172 "no non-adjacent vector moves" );
4173 if (masm) {
4174 switch (ireg) {
4175 case Op_VecS: // copy whole register
4176 case Op_VecD:
4177 case Op_VecX:
4178 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4179 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4180 } else {
4181 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4182 }
4183 break;
4184 case Op_VecY:
4185 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4186 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4187 } else {
4188 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4189 }
4190 break;
4191 case Op_VecZ:
4192 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4193 break;
4194 default:
4195 ShouldNotReachHere();
4196 }
4197 #ifndef PRODUCT
4198 } else {
4199 switch (ireg) {
4200 case Op_VecS:
4201 case Op_VecD:
4202 case Op_VecX:
4203 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4204 break;
4205 case Op_VecY:
4206 case Op_VecZ:
4207 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4208 break;
4209 default:
4210 ShouldNotReachHere();
4211 }
4212 #endif
4213 }
4214 }
4215
4216 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4217 int stack_offset, int reg, uint ireg, outputStream* st) {
4218 if (masm) {
4219 if (is_load) {
4220 switch (ireg) {
4221 case Op_VecS:
4222 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4223 break;
4224 case Op_VecD:
4225 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4226 break;
4227 case Op_VecX:
4228 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4229 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4230 } else {
4231 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4232 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4233 }
4234 break;
4235 case Op_VecY:
4236 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4237 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4238 } else {
4239 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4240 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4241 }
4242 break;
4243 case Op_VecZ:
4244 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4245 break;
4246 default:
4247 ShouldNotReachHere();
4248 }
4249 } else { // store
4250 switch (ireg) {
4251 case Op_VecS:
4252 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4253 break;
4254 case Op_VecD:
4255 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4256 break;
4257 case Op_VecX:
4258 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4259 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4260 }
4261 else {
4262 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4263 }
4264 break;
4265 case Op_VecY:
4266 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4267 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4268 }
4269 else {
4270 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4271 }
4272 break;
4273 case Op_VecZ:
4274 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4275 break;
4276 default:
4277 ShouldNotReachHere();
4278 }
4279 }
4280 #ifndef PRODUCT
4281 } else {
4282 if (is_load) {
4283 switch (ireg) {
4284 case Op_VecS:
4285 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecD:
4288 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4289 break;
4290 case Op_VecX:
4291 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4292 break;
4293 case Op_VecY:
4294 case Op_VecZ:
4295 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4296 break;
4297 default:
4298 ShouldNotReachHere();
4299 }
4300 } else { // store
4301 switch (ireg) {
4302 case Op_VecS:
4303 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecD:
4306 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4307 break;
4308 case Op_VecX:
4309 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4310 break;
4311 case Op_VecY:
4312 case Op_VecZ:
4313 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4314 break;
4315 default:
4316 ShouldNotReachHere();
4317 }
4318 }
4319 #endif
4320 }
4321 }
4322
4323 template <class T>
4324 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4325 int size = type2aelembytes(bt) * len;
4326 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4327 for (int i = 0; i < len; i++) {
4328 int offset = i * type2aelembytes(bt);
4329 switch (bt) {
4330 case T_BYTE: val->at(i) = con; break;
4331 case T_SHORT: {
4332 jshort c = con;
4333 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4334 break;
4335 }
4336 case T_INT: {
4337 jint c = con;
4338 memcpy(val->adr_at(offset), &c, sizeof(jint));
4339 break;
4340 }
4341 case T_LONG: {
4342 jlong c = con;
4343 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4344 break;
4345 }
4346 case T_FLOAT: {
4347 jfloat c = con;
4348 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4349 break;
4350 }
4351 case T_DOUBLE: {
4352 jdouble c = con;
4353 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4354 break;
4355 }
4356 default: assert(false, "%s", type2name(bt));
4357 }
4358 }
4359 return val;
4360 }
4361
4362 static inline jlong high_bit_set(BasicType bt) {
4363 switch (bt) {
4364 case T_BYTE: return 0x8080808080808080;
4365 case T_SHORT: return 0x8000800080008000;
4366 case T_INT: return 0x8000000080000000;
4367 case T_LONG: return 0x8000000000000000;
4368 default:
4369 ShouldNotReachHere();
4370 return 0;
4371 }
4372 }
4373
4374 #ifndef PRODUCT
4375 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4376 st->print("nop \t# %d bytes pad for loops and calls", _count);
4377 }
4378 #endif
4379
4380 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4381 __ nop(_count);
4382 }
4383
4384 uint MachNopNode::size(PhaseRegAlloc*) const {
4385 return _count;
4386 }
4387
4388 #ifndef PRODUCT
4389 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4390 st->print("# breakpoint");
4391 }
4392 #endif
4393
4394 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4395 __ int3();
4396 }
4397
4398 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4399 return MachNode::size(ra_);
4400 }
4401
4402 %}
4403
4404 //----------ENCODING BLOCK-----------------------------------------------------
4405 // This block specifies the encoding classes used by the compiler to
4406 // output byte streams. Encoding classes are parameterized macros
4407 // used by Machine Instruction Nodes in order to generate the bit
4408 // encoding of the instruction. Operands specify their base encoding
4409 // interface with the interface keyword. There are currently
4410 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4411 // COND_INTER. REG_INTER causes an operand to generate a function
4412 // which returns its register number when queried. CONST_INTER causes
4413 // an operand to generate a function which returns the value of the
4414 // constant when queried. MEMORY_INTER causes an operand to generate
4415 // four functions which return the Base Register, the Index Register,
4416 // the Scale Value, and the Offset Value of the operand when queried.
4417 // COND_INTER causes an operand to generate six functions which return
4418 // the encoding code (ie - encoding bits for the instruction)
4419 // associated with each basic boolean condition for a conditional
4420 // instruction.
4421 //
4422 // Instructions specify two basic values for encoding. Again, a
4423 // function is available to check if the constant displacement is an
4424 // oop. They use the ins_encode keyword to specify their encoding
4425 // classes (which must be a sequence of enc_class names, and their
4426 // parameters, specified in the encoding block), and they use the
4427 // opcode keyword to specify, in order, their primary, secondary, and
4428 // tertiary opcode. Only the opcode sections which a particular
4429 // instruction needs for encoding need to be specified.
4430 encode %{
4431 enc_class cdql_enc(no_rax_rdx_RegI div)
4432 %{
4433 // Full implementation of Java idiv and irem; checks for
4434 // special case as described in JVM spec., p.243 & p.271.
4435 //
4436 // normal case special case
4437 //
4438 // input : rax: dividend min_int
4439 // reg: divisor -1
4440 //
4441 // output: rax: quotient (= rax idiv reg) min_int
4442 // rdx: remainder (= rax irem reg) 0
4443 //
4444 // Code sequnce:
4445 //
4446 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4447 // 5: 75 07/08 jne e <normal>
4448 // 7: 33 d2 xor %edx,%edx
4449 // [div >= 8 -> offset + 1]
4450 // [REX_B]
4451 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4452 // c: 74 03/04 je 11 <done>
4453 // 000000000000000e <normal>:
4454 // e: 99 cltd
4455 // [div >= 8 -> offset + 1]
4456 // [REX_B]
4457 // f: f7 f9 idiv $div
4458 // 0000000000000011 <done>:
4459 Label normal;
4460 Label done;
4461
4462 // cmp $0x80000000,%eax
4463 __ cmpl(as_Register(RAX_enc), 0x80000000);
4464
4465 // jne e <normal>
4466 __ jccb(Assembler::notEqual, normal);
4467
4468 // xor %edx,%edx
4469 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4470
4471 // cmp $0xffffffffffffffff,%ecx
4472 __ cmpl($div$$Register, -1);
4473
4474 // je 11 <done>
4475 __ jccb(Assembler::equal, done);
4476
4477 // <normal>
4478 // cltd
4479 __ bind(normal);
4480 __ cdql();
4481
4482 // idivl
4483 // <done>
4484 __ idivl($div$$Register);
4485 __ bind(done);
4486 %}
4487
4488 enc_class cdqq_enc(no_rax_rdx_RegL div)
4489 %{
4490 // Full implementation of Java ldiv and lrem; checks for
4491 // special case as described in JVM spec., p.243 & p.271.
4492 //
4493 // normal case special case
4494 //
4495 // input : rax: dividend min_long
4496 // reg: divisor -1
4497 //
4498 // output: rax: quotient (= rax idiv reg) min_long
4499 // rdx: remainder (= rax irem reg) 0
4500 //
4501 // Code sequnce:
4502 //
4503 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4504 // 7: 00 00 80
4505 // a: 48 39 d0 cmp %rdx,%rax
4506 // d: 75 08 jne 17 <normal>
4507 // f: 33 d2 xor %edx,%edx
4508 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4509 // 15: 74 05 je 1c <done>
4510 // 0000000000000017 <normal>:
4511 // 17: 48 99 cqto
4512 // 19: 48 f7 f9 idiv $div
4513 // 000000000000001c <done>:
4514 Label normal;
4515 Label done;
4516
4517 // mov $0x8000000000000000,%rdx
4518 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4519
4520 // cmp %rdx,%rax
4521 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4522
4523 // jne 17 <normal>
4524 __ jccb(Assembler::notEqual, normal);
4525
4526 // xor %edx,%edx
4527 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4528
4529 // cmp $0xffffffffffffffff,$div
4530 __ cmpq($div$$Register, -1);
4531
4532 // je 1e <done>
4533 __ jccb(Assembler::equal, done);
4534
4535 // <normal>
4536 // cqto
4537 __ bind(normal);
4538 __ cdqq();
4539
4540 // idivq (note: must be emitted by the user of this rule)
4541 // <done>
4542 __ idivq($div$$Register);
4543 __ bind(done);
4544 %}
4545
4546 enc_class clear_avx %{
4547 DEBUG_ONLY(int off0 = __ offset());
4548 if (generate_vzeroupper(Compile::current())) {
4549 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4550 // Clear upper bits of YMM registers when current compiled code uses
4551 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4552 __ vzeroupper();
4553 }
4554 DEBUG_ONLY(int off1 = __ offset());
4555 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4556 %}
4557
4558 enc_class Java_To_Runtime(method meth) %{
4559 __ lea(r10, RuntimeAddress((address)$meth$$method));
4560 __ call(r10);
4561 __ post_call_nop();
4562 %}
4563
4564 enc_class Java_Static_Call(method meth)
4565 %{
4566 // JAVA STATIC CALL
4567 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4568 // determine who we intended to call.
4569 if (!_method) {
4570 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4571 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4572 // The NOP here is purely to ensure that eliding a call to
4573 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4574 __ addr_nop_5();
4575 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4576 } else {
4577 int method_index = resolved_method_index(masm);
4578 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4579 : static_call_Relocation::spec(method_index);
4580 address mark = __ pc();
4581 int call_offset = __ offset();
4582 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4583 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4584 // Calls of the same statically bound method can share
4585 // a stub to the interpreter.
4586 __ code()->shared_stub_to_interp_for(_method, call_offset);
4587 } else {
4588 // Emit stubs for static call.
4589 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4590 __ clear_inst_mark();
4591 if (stub == nullptr) {
4592 ciEnv::current()->record_failure("CodeCache is full");
4593 return;
4594 }
4595 }
4596 }
4597 __ post_call_nop();
4598 %}
4599
4600 enc_class Java_Dynamic_Call(method meth) %{
4601 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4602 __ post_call_nop();
4603 %}
4604
4605 enc_class call_epilog %{
4606 if (VerifyStackAtCalls) {
4607 // Check that stack depth is unchanged: find majik cookie on stack
4608 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4609 Label L;
4610 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4611 __ jccb(Assembler::equal, L);
4612 // Die if stack mismatch
4613 __ int3();
4614 __ bind(L);
4615 }
4616 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic() && _method->return_type()->is_loaded()) {
4617 // The last return value is not set by the callee but used to pass the null marker to compiled code.
4618 // Search for the corresponding projection, get the register and emit code that initialized it.
4619 uint con = (tf()->range_cc()->cnt() - 1);
4620 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
4621 ProjNode* proj = fast_out(i)->as_Proj();
4622 if (proj->_con == con) {
4623 // Set null marker if rax is non-null (a non-null value is returned buffered or scalarized)
4624 OptoReg::Name optoReg = ra_->get_reg_first(proj);
4625 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
4626 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
4627 __ testq(rax, rax);
4628 __ setb(Assembler::notZero, toReg);
4629 __ movzbl(toReg, toReg);
4630 if (reg->is_stack()) {
4631 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
4632 __ movq(Address(rsp, st_off), toReg);
4633 }
4634 break;
4635 }
4636 }
4637 if (return_value_is_used()) {
4638 // An inline type is returned as fields in multiple registers.
4639 // Rax either contains an oop if the inline type is buffered or a pointer
4640 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
4641 // if the lowest bit is set to allow C2 to use the oop after null checking.
4642 // rax &= (rax & 1) - 1
4643 __ movptr(rscratch1, rax);
4644 __ andptr(rscratch1, 0x1);
4645 __ subptr(rscratch1, 0x1);
4646 __ andptr(rax, rscratch1);
4647 }
4648 }
4649 %}
4650
4651 %}
4652
4653 //----------FRAME--------------------------------------------------------------
4654 // Definition of frame structure and management information.
4655 //
4656 // S T A C K L A Y O U T Allocators stack-slot number
4657 // | (to get allocators register number
4658 // G Owned by | | v add OptoReg::stack0())
4659 // r CALLER | |
4660 // o | +--------+ pad to even-align allocators stack-slot
4661 // w V | pad0 | numbers; owned by CALLER
4662 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4663 // h ^ | in | 5
4664 // | | args | 4 Holes in incoming args owned by SELF
4665 // | | | | 3
4666 // | | +--------+
4667 // V | | old out| Empty on Intel, window on Sparc
4668 // | old |preserve| Must be even aligned.
4669 // | SP-+--------+----> Matcher::_old_SP, even aligned
4670 // | | in | 3 area for Intel ret address
4671 // Owned by |preserve| Empty on Sparc.
4672 // SELF +--------+
4673 // | | pad2 | 2 pad to align old SP
4674 // | +--------+ 1
4675 // | | locks | 0
4676 // | +--------+----> OptoReg::stack0(), even aligned
4677 // | | pad1 | 11 pad to align new SP
4678 // | +--------+
4679 // | | | 10
4680 // | | spills | 9 spills
4681 // V | | 8 (pad0 slot for callee)
4682 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4683 // ^ | out | 7
4684 // | | args | 6 Holes in outgoing args owned by CALLEE
4685 // Owned by +--------+
4686 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4687 // | new |preserve| Must be even-aligned.
4688 // | SP-+--------+----> Matcher::_new_SP, even aligned
4689 // | | |
4690 //
4691 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4692 // known from SELF's arguments and the Java calling convention.
4693 // Region 6-7 is determined per call site.
4694 // Note 2: If the calling convention leaves holes in the incoming argument
4695 // area, those holes are owned by SELF. Holes in the outgoing area
4696 // are owned by the CALLEE. Holes should not be necessary in the
4697 // incoming area, as the Java calling convention is completely under
4698 // the control of the AD file. Doubles can be sorted and packed to
4699 // avoid holes. Holes in the outgoing arguments may be necessary for
4700 // varargs C calling conventions.
4701 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4702 // even aligned with pad0 as needed.
4703 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4704 // region 6-11 is even aligned; it may be padded out more so that
4705 // the region from SP to FP meets the minimum stack alignment.
4706 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4707 // alignment. Region 11, pad1, may be dynamically extended so that
4708 // SP meets the minimum alignment.
4709
4710 frame
4711 %{
4712 // These three registers define part of the calling convention
4713 // between compiled code and the interpreter.
4714 inline_cache_reg(RAX); // Inline Cache Register
4715
4716 // Optional: name the operand used by cisc-spilling to access
4717 // [stack_pointer + offset]
4718 cisc_spilling_operand_name(indOffset32);
4719
4720 // Number of stack slots consumed by locking an object
4721 sync_stack_slots(2);
4722
4723 // Compiled code's Frame Pointer
4724 frame_pointer(RSP);
4725
4726 // Interpreter stores its frame pointer in a register which is
4727 // stored to the stack by I2CAdaptors.
4728 // I2CAdaptors convert from interpreted java to compiled java.
4729 interpreter_frame_pointer(RBP);
4730
4731 // Stack alignment requirement
4732 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4733
4734 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4735 // for calls to C. Supports the var-args backing area for register parms.
4736 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4737
4738 // The after-PROLOG location of the return address. Location of
4739 // return address specifies a type (REG or STACK) and a number
4740 // representing the register number (i.e. - use a register name) or
4741 // stack slot.
4742 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4743 // Otherwise, it is above the locks and verification slot and alignment word
4744 return_addr(STACK - 2 +
4745 align_up((Compile::current()->in_preserve_stack_slots() +
4746 Compile::current()->fixed_slots()),
4747 stack_alignment_in_slots()));
4748
4749 // Location of compiled Java return values. Same as C for now.
4750 return_value
4751 %{
4752 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4753 "only return normal values");
4754
4755 static const int lo[Op_RegL + 1] = {
4756 0,
4757 0,
4758 RAX_num, // Op_RegN
4759 RAX_num, // Op_RegI
4760 RAX_num, // Op_RegP
4761 XMM0_num, // Op_RegF
4762 XMM0_num, // Op_RegD
4763 RAX_num // Op_RegL
4764 };
4765 static const int hi[Op_RegL + 1] = {
4766 0,
4767 0,
4768 OptoReg::Bad, // Op_RegN
4769 OptoReg::Bad, // Op_RegI
4770 RAX_H_num, // Op_RegP
4771 OptoReg::Bad, // Op_RegF
4772 XMM0b_num, // Op_RegD
4773 RAX_H_num // Op_RegL
4774 };
4775 // Excluded flags and vector registers.
4776 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4777 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4778 %}
4779 %}
4780
4781 //----------ATTRIBUTES---------------------------------------------------------
4782 //----------Operand Attributes-------------------------------------------------
4783 op_attrib op_cost(0); // Required cost attribute
4784
4785 //----------Instruction Attributes---------------------------------------------
4786 ins_attrib ins_cost(100); // Required cost attribute
4787 ins_attrib ins_size(8); // Required size attribute (in bits)
4788 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4789 // a non-matching short branch variant
4790 // of some long branch?
4791 ins_attrib ins_alignment(1); // Required alignment attribute (must
4792 // be a power of 2) specifies the
4793 // alignment that some part of the
4794 // instruction (not necessarily the
4795 // start) requires. If > 1, a
4796 // compute_padding() function must be
4797 // provided for the instruction
4798
4799 // Whether this node is expanded during code emission into a sequence of
4800 // instructions and the first instruction can perform an implicit null check.
4801 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4802
4803 //----------OPERANDS-----------------------------------------------------------
4804 // Operand definitions must precede instruction definitions for correct parsing
4805 // in the ADLC because operands constitute user defined types which are used in
4806 // instruction definitions.
4807
4808 //----------Simple Operands----------------------------------------------------
4809 // Immediate Operands
4810 // Integer Immediate
4811 operand immI()
4812 %{
4813 match(ConI);
4814
4815 op_cost(10);
4816 format %{ %}
4817 interface(CONST_INTER);
4818 %}
4819
4820 // Constant for test vs zero
4821 operand immI_0()
4822 %{
4823 predicate(n->get_int() == 0);
4824 match(ConI);
4825
4826 op_cost(0);
4827 format %{ %}
4828 interface(CONST_INTER);
4829 %}
4830
4831 // Constant for increment
4832 operand immI_1()
4833 %{
4834 predicate(n->get_int() == 1);
4835 match(ConI);
4836
4837 op_cost(0);
4838 format %{ %}
4839 interface(CONST_INTER);
4840 %}
4841
4842 // Constant for decrement
4843 operand immI_M1()
4844 %{
4845 predicate(n->get_int() == -1);
4846 match(ConI);
4847
4848 op_cost(0);
4849 format %{ %}
4850 interface(CONST_INTER);
4851 %}
4852
4853 operand immI_2()
4854 %{
4855 predicate(n->get_int() == 2);
4856 match(ConI);
4857
4858 op_cost(0);
4859 format %{ %}
4860 interface(CONST_INTER);
4861 %}
4862
4863 operand immI_4()
4864 %{
4865 predicate(n->get_int() == 4);
4866 match(ConI);
4867
4868 op_cost(0);
4869 format %{ %}
4870 interface(CONST_INTER);
4871 %}
4872
4873 operand immI_8()
4874 %{
4875 predicate(n->get_int() == 8);
4876 match(ConI);
4877
4878 op_cost(0);
4879 format %{ %}
4880 interface(CONST_INTER);
4881 %}
4882
4883 // Valid scale values for addressing modes
4884 operand immI2()
4885 %{
4886 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4887 match(ConI);
4888
4889 format %{ %}
4890 interface(CONST_INTER);
4891 %}
4892
4893 operand immU7()
4894 %{
4895 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4896 match(ConI);
4897
4898 op_cost(5);
4899 format %{ %}
4900 interface(CONST_INTER);
4901 %}
4902
4903 operand immI8()
4904 %{
4905 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4906 match(ConI);
4907
4908 op_cost(5);
4909 format %{ %}
4910 interface(CONST_INTER);
4911 %}
4912
4913 operand immU8()
4914 %{
4915 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4916 match(ConI);
4917
4918 op_cost(5);
4919 format %{ %}
4920 interface(CONST_INTER);
4921 %}
4922
4923 operand immI16()
4924 %{
4925 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4926 match(ConI);
4927
4928 op_cost(10);
4929 format %{ %}
4930 interface(CONST_INTER);
4931 %}
4932
4933 // Int Immediate non-negative
4934 operand immU31()
4935 %{
4936 predicate(n->get_int() >= 0);
4937 match(ConI);
4938
4939 op_cost(0);
4940 format %{ %}
4941 interface(CONST_INTER);
4942 %}
4943
4944 // Pointer Immediate
4945 operand immP()
4946 %{
4947 match(ConP);
4948
4949 op_cost(10);
4950 format %{ %}
4951 interface(CONST_INTER);
4952 %}
4953
4954 // Null Pointer Immediate
4955 operand immP0()
4956 %{
4957 predicate(n->get_ptr() == 0);
4958 match(ConP);
4959
4960 op_cost(5);
4961 format %{ %}
4962 interface(CONST_INTER);
4963 %}
4964
4965 // Pointer Immediate
4966 operand immN() %{
4967 match(ConN);
4968
4969 op_cost(10);
4970 format %{ %}
4971 interface(CONST_INTER);
4972 %}
4973
4974 operand immNKlass() %{
4975 match(ConNKlass);
4976
4977 op_cost(10);
4978 format %{ %}
4979 interface(CONST_INTER);
4980 %}
4981
4982 // Null Pointer Immediate
4983 operand immN0() %{
4984 predicate(n->get_narrowcon() == 0);
4985 match(ConN);
4986
4987 op_cost(5);
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 operand immP31()
4993 %{
4994 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4995 && (n->get_ptr() >> 31) == 0);
4996 match(ConP);
4997
4998 op_cost(5);
4999 format %{ %}
5000 interface(CONST_INTER);
5001 %}
5002
5003
5004 // Long Immediate
5005 operand immL()
5006 %{
5007 match(ConL);
5008
5009 op_cost(20);
5010 format %{ %}
5011 interface(CONST_INTER);
5012 %}
5013
5014 // Long Immediate 8-bit
5015 operand immL8()
5016 %{
5017 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
5018 match(ConL);
5019
5020 op_cost(5);
5021 format %{ %}
5022 interface(CONST_INTER);
5023 %}
5024
5025 // Long Immediate 32-bit unsigned
5026 operand immUL32()
5027 %{
5028 predicate(n->get_long() == (unsigned int) (n->get_long()));
5029 match(ConL);
5030
5031 op_cost(10);
5032 format %{ %}
5033 interface(CONST_INTER);
5034 %}
5035
5036 // Long Immediate 32-bit signed
5037 operand immL32()
5038 %{
5039 predicate(n->get_long() == (int) (n->get_long()));
5040 match(ConL);
5041
5042 op_cost(15);
5043 format %{ %}
5044 interface(CONST_INTER);
5045 %}
5046
5047 operand immL_Pow2()
5048 %{
5049 predicate(is_power_of_2((julong)n->get_long()));
5050 match(ConL);
5051
5052 op_cost(15);
5053 format %{ %}
5054 interface(CONST_INTER);
5055 %}
5056
5057 operand immL_NotPow2()
5058 %{
5059 predicate(is_power_of_2((julong)~n->get_long()));
5060 match(ConL);
5061
5062 op_cost(15);
5063 format %{ %}
5064 interface(CONST_INTER);
5065 %}
5066
5067 // Long Immediate zero
5068 operand immL0()
5069 %{
5070 predicate(n->get_long() == 0L);
5071 match(ConL);
5072
5073 op_cost(10);
5074 format %{ %}
5075 interface(CONST_INTER);
5076 %}
5077
5078 // Constant for increment
5079 operand immL1()
5080 %{
5081 predicate(n->get_long() == 1);
5082 match(ConL);
5083
5084 format %{ %}
5085 interface(CONST_INTER);
5086 %}
5087
5088 // Constant for decrement
5089 operand immL_M1()
5090 %{
5091 predicate(n->get_long() == -1);
5092 match(ConL);
5093
5094 format %{ %}
5095 interface(CONST_INTER);
5096 %}
5097
5098 // Long Immediate: low 32-bit mask
5099 operand immL_32bits()
5100 %{
5101 predicate(n->get_long() == 0xFFFFFFFFL);
5102 match(ConL);
5103 op_cost(20);
5104
5105 format %{ %}
5106 interface(CONST_INTER);
5107 %}
5108
5109 // Int Immediate: 2^n-1, positive
5110 operand immI_Pow2M1()
5111 %{
5112 predicate((n->get_int() > 0)
5113 && is_power_of_2((juint)n->get_int() + 1));
5114 match(ConI);
5115
5116 op_cost(20);
5117 format %{ %}
5118 interface(CONST_INTER);
5119 %}
5120
5121 // Float Immediate zero
5122 operand immF0()
5123 %{
5124 predicate(jint_cast(n->getf()) == 0);
5125 match(ConF);
5126
5127 op_cost(5);
5128 format %{ %}
5129 interface(CONST_INTER);
5130 %}
5131
5132 // Float Immediate
5133 operand immF()
5134 %{
5135 match(ConF);
5136
5137 op_cost(15);
5138 format %{ %}
5139 interface(CONST_INTER);
5140 %}
5141
5142 // Half Float Immediate
5143 operand immH()
5144 %{
5145 match(ConH);
5146
5147 op_cost(15);
5148 format %{ %}
5149 interface(CONST_INTER);
5150 %}
5151
5152 // Double Immediate zero
5153 operand immD0()
5154 %{
5155 predicate(jlong_cast(n->getd()) == 0);
5156 match(ConD);
5157
5158 op_cost(5);
5159 format %{ %}
5160 interface(CONST_INTER);
5161 %}
5162
5163 // Double Immediate
5164 operand immD()
5165 %{
5166 match(ConD);
5167
5168 op_cost(15);
5169 format %{ %}
5170 interface(CONST_INTER);
5171 %}
5172
5173 // Immediates for special shifts (sign extend)
5174
5175 // Constants for increment
5176 operand immI_16()
5177 %{
5178 predicate(n->get_int() == 16);
5179 match(ConI);
5180
5181 format %{ %}
5182 interface(CONST_INTER);
5183 %}
5184
5185 operand immI_24()
5186 %{
5187 predicate(n->get_int() == 24);
5188 match(ConI);
5189
5190 format %{ %}
5191 interface(CONST_INTER);
5192 %}
5193
5194 // Constant for byte-wide masking
5195 operand immI_255()
5196 %{
5197 predicate(n->get_int() == 255);
5198 match(ConI);
5199
5200 format %{ %}
5201 interface(CONST_INTER);
5202 %}
5203
5204 // Constant for short-wide masking
5205 operand immI_65535()
5206 %{
5207 predicate(n->get_int() == 65535);
5208 match(ConI);
5209
5210 format %{ %}
5211 interface(CONST_INTER);
5212 %}
5213
5214 // Constant for byte-wide masking
5215 operand immL_255()
5216 %{
5217 predicate(n->get_long() == 255);
5218 match(ConL);
5219
5220 format %{ %}
5221 interface(CONST_INTER);
5222 %}
5223
5224 // Constant for short-wide masking
5225 operand immL_65535()
5226 %{
5227 predicate(n->get_long() == 65535);
5228 match(ConL);
5229
5230 format %{ %}
5231 interface(CONST_INTER);
5232 %}
5233
5234 operand kReg()
5235 %{
5236 constraint(ALLOC_IN_RC(vectmask_reg));
5237 match(RegVectMask);
5238 format %{%}
5239 interface(REG_INTER);
5240 %}
5241
5242 // Register Operands
5243 // Integer Register
5244 operand rRegI()
5245 %{
5246 constraint(ALLOC_IN_RC(int_reg));
5247 match(RegI);
5248
5249 match(rax_RegI);
5250 match(rbx_RegI);
5251 match(rcx_RegI);
5252 match(rdx_RegI);
5253 match(rdi_RegI);
5254
5255 format %{ %}
5256 interface(REG_INTER);
5257 %}
5258
5259 // Special Registers
5260 operand rax_RegI()
5261 %{
5262 constraint(ALLOC_IN_RC(int_rax_reg));
5263 match(RegI);
5264 match(rRegI);
5265
5266 format %{ "RAX" %}
5267 interface(REG_INTER);
5268 %}
5269
5270 // Special Registers
5271 operand rbx_RegI()
5272 %{
5273 constraint(ALLOC_IN_RC(int_rbx_reg));
5274 match(RegI);
5275 match(rRegI);
5276
5277 format %{ "RBX" %}
5278 interface(REG_INTER);
5279 %}
5280
5281 operand rcx_RegI()
5282 %{
5283 constraint(ALLOC_IN_RC(int_rcx_reg));
5284 match(RegI);
5285 match(rRegI);
5286
5287 format %{ "RCX" %}
5288 interface(REG_INTER);
5289 %}
5290
5291 operand rdx_RegI()
5292 %{
5293 constraint(ALLOC_IN_RC(int_rdx_reg));
5294 match(RegI);
5295 match(rRegI);
5296
5297 format %{ "RDX" %}
5298 interface(REG_INTER);
5299 %}
5300
5301 operand rdi_RegI()
5302 %{
5303 constraint(ALLOC_IN_RC(int_rdi_reg));
5304 match(RegI);
5305 match(rRegI);
5306
5307 format %{ "RDI" %}
5308 interface(REG_INTER);
5309 %}
5310
5311 operand no_rax_rdx_RegI()
5312 %{
5313 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5314 match(RegI);
5315 match(rbx_RegI);
5316 match(rcx_RegI);
5317 match(rdi_RegI);
5318
5319 format %{ %}
5320 interface(REG_INTER);
5321 %}
5322
5323 operand no_rbp_r13_RegI()
5324 %{
5325 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5326 match(RegI);
5327 match(rRegI);
5328 match(rax_RegI);
5329 match(rbx_RegI);
5330 match(rcx_RegI);
5331 match(rdx_RegI);
5332 match(rdi_RegI);
5333
5334 format %{ %}
5335 interface(REG_INTER);
5336 %}
5337
5338 // Pointer Register
5339 operand any_RegP()
5340 %{
5341 constraint(ALLOC_IN_RC(any_reg));
5342 match(RegP);
5343 match(rax_RegP);
5344 match(rbx_RegP);
5345 match(rdi_RegP);
5346 match(rsi_RegP);
5347 match(rbp_RegP);
5348 match(r15_RegP);
5349 match(rRegP);
5350
5351 format %{ %}
5352 interface(REG_INTER);
5353 %}
5354
5355 operand rRegP()
5356 %{
5357 constraint(ALLOC_IN_RC(ptr_reg));
5358 match(RegP);
5359 match(rax_RegP);
5360 match(rbx_RegP);
5361 match(rdi_RegP);
5362 match(rsi_RegP);
5363 match(rbp_RegP); // See Q&A below about
5364 match(r15_RegP); // r15_RegP and rbp_RegP.
5365
5366 format %{ %}
5367 interface(REG_INTER);
5368 %}
5369
5370 operand rRegN() %{
5371 constraint(ALLOC_IN_RC(int_reg));
5372 match(RegN);
5373
5374 format %{ %}
5375 interface(REG_INTER);
5376 %}
5377
5378 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5379 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5380 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5381 // The output of an instruction is controlled by the allocator, which respects
5382 // register class masks, not match rules. Unless an instruction mentions
5383 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5384 // by the allocator as an input.
5385 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5386 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5387 // result, RBP is not included in the output of the instruction either.
5388
5389 // This operand is not allowed to use RBP even if
5390 // RBP is not used to hold the frame pointer.
5391 operand no_rbp_RegP()
5392 %{
5393 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5394 match(RegP);
5395 match(rbx_RegP);
5396 match(rsi_RegP);
5397 match(rdi_RegP);
5398
5399 format %{ %}
5400 interface(REG_INTER);
5401 %}
5402
5403 // Special Registers
5404 // Return a pointer value
5405 operand rax_RegP()
5406 %{
5407 constraint(ALLOC_IN_RC(ptr_rax_reg));
5408 match(RegP);
5409 match(rRegP);
5410
5411 format %{ %}
5412 interface(REG_INTER);
5413 %}
5414
5415 // Special Registers
5416 // Return a compressed pointer value
5417 operand rax_RegN()
5418 %{
5419 constraint(ALLOC_IN_RC(int_rax_reg));
5420 match(RegN);
5421 match(rRegN);
5422
5423 format %{ %}
5424 interface(REG_INTER);
5425 %}
5426
5427 // Used in AtomicAdd
5428 operand rbx_RegP()
5429 %{
5430 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5431 match(RegP);
5432 match(rRegP);
5433
5434 format %{ %}
5435 interface(REG_INTER);
5436 %}
5437
5438 operand rsi_RegP()
5439 %{
5440 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5441 match(RegP);
5442 match(rRegP);
5443
5444 format %{ %}
5445 interface(REG_INTER);
5446 %}
5447
5448 operand rbp_RegP()
5449 %{
5450 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5451 match(RegP);
5452 match(rRegP);
5453
5454 format %{ %}
5455 interface(REG_INTER);
5456 %}
5457
5458 // Used in rep stosq
5459 operand rdi_RegP()
5460 %{
5461 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5462 match(RegP);
5463 match(rRegP);
5464
5465 format %{ %}
5466 interface(REG_INTER);
5467 %}
5468
5469 operand r15_RegP()
5470 %{
5471 constraint(ALLOC_IN_RC(ptr_r15_reg));
5472 match(RegP);
5473 match(rRegP);
5474
5475 format %{ %}
5476 interface(REG_INTER);
5477 %}
5478
5479 operand rRegL()
5480 %{
5481 constraint(ALLOC_IN_RC(long_reg));
5482 match(RegL);
5483 match(rax_RegL);
5484 match(rdx_RegL);
5485
5486 format %{ %}
5487 interface(REG_INTER);
5488 %}
5489
5490 // Special Registers
5491 operand no_rax_rdx_RegL()
5492 %{
5493 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5494 match(RegL);
5495 match(rRegL);
5496
5497 format %{ %}
5498 interface(REG_INTER);
5499 %}
5500
5501 operand rax_RegL()
5502 %{
5503 constraint(ALLOC_IN_RC(long_rax_reg));
5504 match(RegL);
5505 match(rRegL);
5506
5507 format %{ "RAX" %}
5508 interface(REG_INTER);
5509 %}
5510
5511 operand rcx_RegL()
5512 %{
5513 constraint(ALLOC_IN_RC(long_rcx_reg));
5514 match(RegL);
5515 match(rRegL);
5516
5517 format %{ %}
5518 interface(REG_INTER);
5519 %}
5520
5521 operand rdx_RegL()
5522 %{
5523 constraint(ALLOC_IN_RC(long_rdx_reg));
5524 match(RegL);
5525 match(rRegL);
5526
5527 format %{ %}
5528 interface(REG_INTER);
5529 %}
5530
5531 operand r11_RegL()
5532 %{
5533 constraint(ALLOC_IN_RC(long_r11_reg));
5534 match(RegL);
5535 match(rRegL);
5536
5537 format %{ %}
5538 interface(REG_INTER);
5539 %}
5540
5541 operand no_rbp_r13_RegL()
5542 %{
5543 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5544 match(RegL);
5545 match(rRegL);
5546 match(rax_RegL);
5547 match(rcx_RegL);
5548 match(rdx_RegL);
5549
5550 format %{ %}
5551 interface(REG_INTER);
5552 %}
5553
5554 // Flags register, used as output of compare instructions
5555 operand rFlagsReg()
5556 %{
5557 constraint(ALLOC_IN_RC(int_flags));
5558 match(RegFlags);
5559
5560 format %{ "RFLAGS" %}
5561 interface(REG_INTER);
5562 %}
5563
5564 // Flags register, used as output of FLOATING POINT compare instructions
5565 operand rFlagsRegU()
5566 %{
5567 constraint(ALLOC_IN_RC(int_flags));
5568 match(RegFlags);
5569
5570 format %{ "RFLAGS_U" %}
5571 interface(REG_INTER);
5572 %}
5573
5574 operand rFlagsRegUCF() %{
5575 constraint(ALLOC_IN_RC(int_flags));
5576 match(RegFlags);
5577 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5578
5579 format %{ "RFLAGS_U_CF" %}
5580 interface(REG_INTER);
5581 %}
5582
5583 operand rFlagsRegUCFE() %{
5584 constraint(ALLOC_IN_RC(int_flags));
5585 match(RegFlags);
5586 predicate(UseAPX && VM_Version::supports_avx10_2());
5587
5588 format %{ "RFLAGS_U_CFE" %}
5589 interface(REG_INTER);
5590 %}
5591
5592 // Float register operands
5593 operand regF() %{
5594 constraint(ALLOC_IN_RC(float_reg));
5595 match(RegF);
5596
5597 format %{ %}
5598 interface(REG_INTER);
5599 %}
5600
5601 // Float register operands
5602 operand legRegF() %{
5603 constraint(ALLOC_IN_RC(float_reg_legacy));
5604 match(RegF);
5605
5606 format %{ %}
5607 interface(REG_INTER);
5608 %}
5609
5610 // Float register operands
5611 operand vlRegF() %{
5612 constraint(ALLOC_IN_RC(float_reg_vl));
5613 match(RegF);
5614
5615 format %{ %}
5616 interface(REG_INTER);
5617 %}
5618
5619 // Double register operands
5620 operand regD() %{
5621 constraint(ALLOC_IN_RC(double_reg));
5622 match(RegD);
5623
5624 format %{ %}
5625 interface(REG_INTER);
5626 %}
5627
5628 // Double register operands
5629 operand legRegD() %{
5630 constraint(ALLOC_IN_RC(double_reg_legacy));
5631 match(RegD);
5632
5633 format %{ %}
5634 interface(REG_INTER);
5635 %}
5636
5637 // Double register operands
5638 operand vlRegD() %{
5639 constraint(ALLOC_IN_RC(double_reg_vl));
5640 match(RegD);
5641
5642 format %{ %}
5643 interface(REG_INTER);
5644 %}
5645
5646 //----------Memory Operands----------------------------------------------------
5647 // Direct Memory Operand
5648 // operand direct(immP addr)
5649 // %{
5650 // match(addr);
5651
5652 // format %{ "[$addr]" %}
5653 // interface(MEMORY_INTER) %{
5654 // base(0xFFFFFFFF);
5655 // index(0x4);
5656 // scale(0x0);
5657 // disp($addr);
5658 // %}
5659 // %}
5660
5661 // Indirect Memory Operand
5662 operand indirect(any_RegP reg)
5663 %{
5664 constraint(ALLOC_IN_RC(ptr_reg));
5665 match(reg);
5666
5667 format %{ "[$reg]" %}
5668 interface(MEMORY_INTER) %{
5669 base($reg);
5670 index(0x4);
5671 scale(0x0);
5672 disp(0x0);
5673 %}
5674 %}
5675
5676 // Indirect Memory Plus Short Offset Operand
5677 operand indOffset8(any_RegP reg, immL8 off)
5678 %{
5679 constraint(ALLOC_IN_RC(ptr_reg));
5680 match(AddP reg off);
5681
5682 format %{ "[$reg + $off (8-bit)]" %}
5683 interface(MEMORY_INTER) %{
5684 base($reg);
5685 index(0x4);
5686 scale(0x0);
5687 disp($off);
5688 %}
5689 %}
5690
5691 // Indirect Memory Plus Long Offset Operand
5692 operand indOffset32(any_RegP reg, immL32 off)
5693 %{
5694 constraint(ALLOC_IN_RC(ptr_reg));
5695 match(AddP reg off);
5696
5697 format %{ "[$reg + $off (32-bit)]" %}
5698 interface(MEMORY_INTER) %{
5699 base($reg);
5700 index(0x4);
5701 scale(0x0);
5702 disp($off);
5703 %}
5704 %}
5705
5706 // Indirect Memory Plus Index Register Plus Offset Operand
5707 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5708 %{
5709 constraint(ALLOC_IN_RC(ptr_reg));
5710 match(AddP (AddP reg lreg) off);
5711
5712 op_cost(10);
5713 format %{"[$reg + $off + $lreg]" %}
5714 interface(MEMORY_INTER) %{
5715 base($reg);
5716 index($lreg);
5717 scale(0x0);
5718 disp($off);
5719 %}
5720 %}
5721
5722 // Indirect Memory Plus Index Register Plus Offset Operand
5723 operand indIndex(any_RegP reg, rRegL lreg)
5724 %{
5725 constraint(ALLOC_IN_RC(ptr_reg));
5726 match(AddP reg lreg);
5727
5728 op_cost(10);
5729 format %{"[$reg + $lreg]" %}
5730 interface(MEMORY_INTER) %{
5731 base($reg);
5732 index($lreg);
5733 scale(0x0);
5734 disp(0x0);
5735 %}
5736 %}
5737
5738 // Indirect Memory Times Scale Plus Index Register
5739 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5740 %{
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(AddP reg (LShiftL lreg scale));
5743
5744 op_cost(10);
5745 format %{"[$reg + $lreg << $scale]" %}
5746 interface(MEMORY_INTER) %{
5747 base($reg);
5748 index($lreg);
5749 scale($scale);
5750 disp(0x0);
5751 %}
5752 %}
5753
5754 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5755 %{
5756 constraint(ALLOC_IN_RC(ptr_reg));
5757 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5758 match(AddP reg (LShiftL (ConvI2L idx) scale));
5759
5760 op_cost(10);
5761 format %{"[$reg + pos $idx << $scale]" %}
5762 interface(MEMORY_INTER) %{
5763 base($reg);
5764 index($idx);
5765 scale($scale);
5766 disp(0x0);
5767 %}
5768 %}
5769
5770 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5771 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5772 %{
5773 constraint(ALLOC_IN_RC(ptr_reg));
5774 match(AddP (AddP reg (LShiftL lreg scale)) off);
5775
5776 op_cost(10);
5777 format %{"[$reg + $off + $lreg << $scale]" %}
5778 interface(MEMORY_INTER) %{
5779 base($reg);
5780 index($lreg);
5781 scale($scale);
5782 disp($off);
5783 %}
5784 %}
5785
5786 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5787 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5788 %{
5789 constraint(ALLOC_IN_RC(ptr_reg));
5790 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5791 match(AddP (AddP reg (ConvI2L idx)) off);
5792
5793 op_cost(10);
5794 format %{"[$reg + $off + $idx]" %}
5795 interface(MEMORY_INTER) %{
5796 base($reg);
5797 index($idx);
5798 scale(0x0);
5799 disp($off);
5800 %}
5801 %}
5802
5803 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5804 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5805 %{
5806 constraint(ALLOC_IN_RC(ptr_reg));
5807 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5808 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5809
5810 op_cost(10);
5811 format %{"[$reg + $off + $idx << $scale]" %}
5812 interface(MEMORY_INTER) %{
5813 base($reg);
5814 index($idx);
5815 scale($scale);
5816 disp($off);
5817 %}
5818 %}
5819
5820 // Indirect Narrow Oop Operand
5821 operand indCompressedOop(rRegN reg) %{
5822 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5823 constraint(ALLOC_IN_RC(ptr_reg));
5824 match(DecodeN reg);
5825
5826 op_cost(10);
5827 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
5828 interface(MEMORY_INTER) %{
5829 base(0xc); // R12
5830 index($reg);
5831 scale(0x3);
5832 disp(0x0);
5833 %}
5834 %}
5835
5836 // Indirect Narrow Oop Plus Offset Operand
5837 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5838 // we can't free r12 even with CompressedOops::base() == nullptr.
5839 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5840 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5841 constraint(ALLOC_IN_RC(ptr_reg));
5842 match(AddP (DecodeN reg) off);
5843
5844 op_cost(10);
5845 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5846 interface(MEMORY_INTER) %{
5847 base(0xc); // R12
5848 index($reg);
5849 scale(0x3);
5850 disp($off);
5851 %}
5852 %}
5853
5854 // Indirect Memory Operand
5855 operand indirectNarrow(rRegN reg)
5856 %{
5857 predicate(CompressedOops::shift() == 0);
5858 constraint(ALLOC_IN_RC(ptr_reg));
5859 match(DecodeN reg);
5860
5861 format %{ "[$reg]" %}
5862 interface(MEMORY_INTER) %{
5863 base($reg);
5864 index(0x4);
5865 scale(0x0);
5866 disp(0x0);
5867 %}
5868 %}
5869
5870 // Indirect Memory Plus Short Offset Operand
5871 operand indOffset8Narrow(rRegN reg, immL8 off)
5872 %{
5873 predicate(CompressedOops::shift() == 0);
5874 constraint(ALLOC_IN_RC(ptr_reg));
5875 match(AddP (DecodeN reg) off);
5876
5877 format %{ "[$reg + $off (8-bit)]" %}
5878 interface(MEMORY_INTER) %{
5879 base($reg);
5880 index(0x4);
5881 scale(0x0);
5882 disp($off);
5883 %}
5884 %}
5885
5886 // Indirect Memory Plus Long Offset Operand
5887 operand indOffset32Narrow(rRegN reg, immL32 off)
5888 %{
5889 predicate(CompressedOops::shift() == 0);
5890 constraint(ALLOC_IN_RC(ptr_reg));
5891 match(AddP (DecodeN reg) off);
5892
5893 format %{ "[$reg + $off (32-bit)]" %}
5894 interface(MEMORY_INTER) %{
5895 base($reg);
5896 index(0x4);
5897 scale(0x0);
5898 disp($off);
5899 %}
5900 %}
5901
5902 // Indirect Memory Plus Index Register Plus Offset Operand
5903 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5904 %{
5905 predicate(CompressedOops::shift() == 0);
5906 constraint(ALLOC_IN_RC(ptr_reg));
5907 match(AddP (AddP (DecodeN reg) lreg) off);
5908
5909 op_cost(10);
5910 format %{"[$reg + $off + $lreg]" %}
5911 interface(MEMORY_INTER) %{
5912 base($reg);
5913 index($lreg);
5914 scale(0x0);
5915 disp($off);
5916 %}
5917 %}
5918
5919 // Indirect Memory Plus Index Register Plus Offset Operand
5920 operand indIndexNarrow(rRegN reg, rRegL lreg)
5921 %{
5922 predicate(CompressedOops::shift() == 0);
5923 constraint(ALLOC_IN_RC(ptr_reg));
5924 match(AddP (DecodeN reg) lreg);
5925
5926 op_cost(10);
5927 format %{"[$reg + $lreg]" %}
5928 interface(MEMORY_INTER) %{
5929 base($reg);
5930 index($lreg);
5931 scale(0x0);
5932 disp(0x0);
5933 %}
5934 %}
5935
5936 // Indirect Memory Times Scale Plus Index Register
5937 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5938 %{
5939 predicate(CompressedOops::shift() == 0);
5940 constraint(ALLOC_IN_RC(ptr_reg));
5941 match(AddP (DecodeN reg) (LShiftL lreg scale));
5942
5943 op_cost(10);
5944 format %{"[$reg + $lreg << $scale]" %}
5945 interface(MEMORY_INTER) %{
5946 base($reg);
5947 index($lreg);
5948 scale($scale);
5949 disp(0x0);
5950 %}
5951 %}
5952
5953 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5954 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5955 %{
5956 predicate(CompressedOops::shift() == 0);
5957 constraint(ALLOC_IN_RC(ptr_reg));
5958 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5959
5960 op_cost(10);
5961 format %{"[$reg + $off + $lreg << $scale]" %}
5962 interface(MEMORY_INTER) %{
5963 base($reg);
5964 index($lreg);
5965 scale($scale);
5966 disp($off);
5967 %}
5968 %}
5969
5970 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5971 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5972 %{
5973 constraint(ALLOC_IN_RC(ptr_reg));
5974 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5975 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5976
5977 op_cost(10);
5978 format %{"[$reg + $off + $idx]" %}
5979 interface(MEMORY_INTER) %{
5980 base($reg);
5981 index($idx);
5982 scale(0x0);
5983 disp($off);
5984 %}
5985 %}
5986
5987 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5988 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5989 %{
5990 constraint(ALLOC_IN_RC(ptr_reg));
5991 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5992 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5993
5994 op_cost(10);
5995 format %{"[$reg + $off + $idx << $scale]" %}
5996 interface(MEMORY_INTER) %{
5997 base($reg);
5998 index($idx);
5999 scale($scale);
6000 disp($off);
6001 %}
6002 %}
6003
6004 //----------Special Memory Operands--------------------------------------------
6005 // Stack Slot Operand - This operand is used for loading and storing temporary
6006 // values on the stack where a match requires a value to
6007 // flow through memory.
6008 operand stackSlotP(sRegP reg)
6009 %{
6010 constraint(ALLOC_IN_RC(stack_slots));
6011 // No match rule because this operand is only generated in matching
6012
6013 format %{ "[$reg]" %}
6014 interface(MEMORY_INTER) %{
6015 base(0x4); // RSP
6016 index(0x4); // No Index
6017 scale(0x0); // No Scale
6018 disp($reg); // Stack Offset
6019 %}
6020 %}
6021
6022 operand stackSlotI(sRegI reg)
6023 %{
6024 constraint(ALLOC_IN_RC(stack_slots));
6025 // No match rule because this operand is only generated in matching
6026
6027 format %{ "[$reg]" %}
6028 interface(MEMORY_INTER) %{
6029 base(0x4); // RSP
6030 index(0x4); // No Index
6031 scale(0x0); // No Scale
6032 disp($reg); // Stack Offset
6033 %}
6034 %}
6035
6036 operand stackSlotF(sRegF reg)
6037 %{
6038 constraint(ALLOC_IN_RC(stack_slots));
6039 // No match rule because this operand is only generated in matching
6040
6041 format %{ "[$reg]" %}
6042 interface(MEMORY_INTER) %{
6043 base(0x4); // RSP
6044 index(0x4); // No Index
6045 scale(0x0); // No Scale
6046 disp($reg); // Stack Offset
6047 %}
6048 %}
6049
6050 operand stackSlotD(sRegD reg)
6051 %{
6052 constraint(ALLOC_IN_RC(stack_slots));
6053 // No match rule because this operand is only generated in matching
6054
6055 format %{ "[$reg]" %}
6056 interface(MEMORY_INTER) %{
6057 base(0x4); // RSP
6058 index(0x4); // No Index
6059 scale(0x0); // No Scale
6060 disp($reg); // Stack Offset
6061 %}
6062 %}
6063 operand stackSlotL(sRegL reg)
6064 %{
6065 constraint(ALLOC_IN_RC(stack_slots));
6066 // No match rule because this operand is only generated in matching
6067
6068 format %{ "[$reg]" %}
6069 interface(MEMORY_INTER) %{
6070 base(0x4); // RSP
6071 index(0x4); // No Index
6072 scale(0x0); // No Scale
6073 disp($reg); // Stack Offset
6074 %}
6075 %}
6076
6077 //----------Conditional Branch Operands----------------------------------------
6078 // Comparison Op - This is the operation of the comparison, and is limited to
6079 // the following set of codes:
6080 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6081 //
6082 // Other attributes of the comparison, such as unsignedness, are specified
6083 // by the comparison instruction that sets a condition code flags register.
6084 // That result is represented by a flags operand whose subtype is appropriate
6085 // to the unsignedness (etc.) of the comparison.
6086 //
6087 // Later, the instruction which matches both the Comparison Op (a Bool) and
6088 // the flags (produced by the Cmp) specifies the coding of the comparison op
6089 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6090
6091 // Comparison Code
6092 operand cmpOp()
6093 %{
6094 match(Bool);
6095
6096 format %{ "" %}
6097 interface(COND_INTER) %{
6098 equal(0x4, "e");
6099 not_equal(0x5, "ne");
6100 less(0xc, "l");
6101 greater_equal(0xd, "ge");
6102 less_equal(0xe, "le");
6103 greater(0xf, "g");
6104 overflow(0x0, "o");
6105 no_overflow(0x1, "no");
6106 %}
6107 %}
6108
6109 // Comparison Code, unsigned compare. Used by FP also, with
6110 // C2 (unordered) turned into GT or LT already. The other bits
6111 // C0 and C3 are turned into Carry & Zero flags.
6112 operand cmpOpU()
6113 %{
6114 match(Bool);
6115
6116 format %{ "" %}
6117 interface(COND_INTER) %{
6118 equal(0x4, "e");
6119 not_equal(0x5, "ne");
6120 less(0x2, "b");
6121 greater_equal(0x3, "ae");
6122 less_equal(0x6, "be");
6123 greater(0x7, "a");
6124 overflow(0x0, "o");
6125 no_overflow(0x1, "no");
6126 %}
6127 %}
6128
6129
6130 // Floating comparisons that don't require any fixup for the unordered case,
6131 // If both inputs of the comparison are the same, ZF is always set so we
6132 // don't need to use cmpOpUCF2 for eq/ne
6133 operand cmpOpUCF() %{
6134 match(Bool);
6135 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6136 (n->as_Bool()->_test._test == BoolTest::lt ||
6137 n->as_Bool()->_test._test == BoolTest::ge ||
6138 n->as_Bool()->_test._test == BoolTest::le ||
6139 n->as_Bool()->_test._test == BoolTest::gt ||
6140 n->in(1)->in(1) == n->in(1)->in(2)));
6141 format %{ "" %}
6142 interface(COND_INTER) %{
6143 equal(0xb, "np");
6144 not_equal(0xa, "p");
6145 less(0x2, "b");
6146 greater_equal(0x3, "ae");
6147 less_equal(0x6, "be");
6148 greater(0x7, "a");
6149 overflow(0x0, "o");
6150 no_overflow(0x1, "no");
6151 %}
6152 %}
6153
6154
6155 // Floating comparisons that can be fixed up with extra conditional jumps
6156 operand cmpOpUCF2() %{
6157 match(Bool);
6158 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6159 (n->as_Bool()->_test._test == BoolTest::ne ||
6160 n->as_Bool()->_test._test == BoolTest::eq) &&
6161 n->in(1)->in(1) != n->in(1)->in(2));
6162 format %{ "" %}
6163 interface(COND_INTER) %{
6164 equal(0x4, "e");
6165 not_equal(0x5, "ne");
6166 less(0x2, "b");
6167 greater_equal(0x3, "ae");
6168 less_equal(0x6, "be");
6169 greater(0x7, "a");
6170 overflow(0x0, "o");
6171 no_overflow(0x1, "no");
6172 %}
6173 %}
6174
6175
6176 // Floating point comparisons that set condition flags to test more directly,
6177 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6178 // are used for L (<) and LE (<=) conditions. It's important to convert these
6179 // latter conditions to ones that use unsigned tests before passing into an
6180 // instruction because the preceding comparison might be based on a three way
6181 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6182 operand cmpOpUCFE()
6183 %{
6184 match(Bool);
6185 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6186 (n->as_Bool()->_test._test == BoolTest::ne ||
6187 n->as_Bool()->_test._test == BoolTest::eq ||
6188 n->as_Bool()->_test._test == BoolTest::lt ||
6189 n->as_Bool()->_test._test == BoolTest::ge ||
6190 n->as_Bool()->_test._test == BoolTest::le ||
6191 n->as_Bool()->_test._test == BoolTest::gt));
6192
6193 format %{ "" %}
6194 interface(COND_INTER) %{
6195 equal(0x4, "e");
6196 not_equal(0x5, "ne");
6197 less(0x2, "b");
6198 greater_equal(0x3, "ae");
6199 less_equal(0x6, "be");
6200 greater(0x7, "a");
6201 overflow(0x0, "o");
6202 no_overflow(0x1, "no");
6203 %}
6204 %}
6205
6206 // Operands for bound floating pointer register arguments
6207 operand rxmm0() %{
6208 constraint(ALLOC_IN_RC(xmm0_reg));
6209 match(VecX);
6210 format%{%}
6211 interface(REG_INTER);
6212 %}
6213
6214 // Vectors
6215
6216 // Dummy generic vector class. Should be used for all vector operands.
6217 // Replaced with vec[SDXYZ] during post-selection pass.
6218 operand vec() %{
6219 constraint(ALLOC_IN_RC(dynamic));
6220 match(VecX);
6221 match(VecY);
6222 match(VecZ);
6223 match(VecS);
6224 match(VecD);
6225
6226 format %{ %}
6227 interface(REG_INTER);
6228 %}
6229
6230 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6231 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6232 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6233 // runtime code generation via reg_class_dynamic.
6234 operand legVec() %{
6235 constraint(ALLOC_IN_RC(dynamic));
6236 match(VecX);
6237 match(VecY);
6238 match(VecZ);
6239 match(VecS);
6240 match(VecD);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Replaces vec during post-selection cleanup. See above.
6247 operand vecS() %{
6248 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6249 match(VecS);
6250
6251 format %{ %}
6252 interface(REG_INTER);
6253 %}
6254
6255 // Replaces legVec during post-selection cleanup. See above.
6256 operand legVecS() %{
6257 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6258 match(VecS);
6259
6260 format %{ %}
6261 interface(REG_INTER);
6262 %}
6263
6264 // Replaces vec during post-selection cleanup. See above.
6265 operand vecD() %{
6266 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6267 match(VecD);
6268
6269 format %{ %}
6270 interface(REG_INTER);
6271 %}
6272
6273 // Replaces legVec during post-selection cleanup. See above.
6274 operand legVecD() %{
6275 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6276 match(VecD);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 // Replaces vec during post-selection cleanup. See above.
6283 operand vecX() %{
6284 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6285 match(VecX);
6286
6287 format %{ %}
6288 interface(REG_INTER);
6289 %}
6290
6291 // Replaces legVec during post-selection cleanup. See above.
6292 operand legVecX() %{
6293 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6294 match(VecX);
6295
6296 format %{ %}
6297 interface(REG_INTER);
6298 %}
6299
6300 // Replaces vec during post-selection cleanup. See above.
6301 operand vecY() %{
6302 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6303 match(VecY);
6304
6305 format %{ %}
6306 interface(REG_INTER);
6307 %}
6308
6309 // Replaces legVec during post-selection cleanup. See above.
6310 operand legVecY() %{
6311 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6312 match(VecY);
6313
6314 format %{ %}
6315 interface(REG_INTER);
6316 %}
6317
6318 // Replaces vec during post-selection cleanup. See above.
6319 operand vecZ() %{
6320 constraint(ALLOC_IN_RC(vectorz_reg));
6321 match(VecZ);
6322
6323 format %{ %}
6324 interface(REG_INTER);
6325 %}
6326
6327 // Replaces legVec during post-selection cleanup. See above.
6328 operand legVecZ() %{
6329 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6330 match(VecZ);
6331
6332 format %{ %}
6333 interface(REG_INTER);
6334 %}
6335
6336 //----------OPERAND CLASSES----------------------------------------------------
6337 // Operand Classes are groups of operands that are used as to simplify
6338 // instruction definitions by not requiring the AD writer to specify separate
6339 // instructions for every form of operand when the instruction accepts
6340 // multiple operand types with the same basic encoding and format. The classic
6341 // case of this is memory operands.
6342
6343 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6344 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6345 indCompressedOop, indCompressedOopOffset,
6346 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6347 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6348 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6349
6350 //----------PIPELINE-----------------------------------------------------------
6351 // Rules which define the behavior of the target architectures pipeline.
6352 pipeline %{
6353
6354 //----------ATTRIBUTES---------------------------------------------------------
6355 attributes %{
6356 variable_size_instructions; // Fixed size instructions
6357 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6358 instruction_unit_size = 1; // An instruction is 1 bytes long
6359 instruction_fetch_unit_size = 16; // The processor fetches one line
6360 instruction_fetch_units = 1; // of 16 bytes
6361 %}
6362
6363 //----------RESOURCES----------------------------------------------------------
6364 // Resources are the functional units available to the machine
6365
6366 // Generic P2/P3 pipeline
6367 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6368 // 3 instructions decoded per cycle.
6369 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6370 // 3 ALU op, only ALU0 handles mul instructions.
6371 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6372 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6373 BR, FPU,
6374 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6375
6376 //----------PIPELINE DESCRIPTION-----------------------------------------------
6377 // Pipeline Description specifies the stages in the machine's pipeline
6378
6379 // Generic P2/P3 pipeline
6380 pipe_desc(S0, S1, S2, S3, S4, S5);
6381
6382 //----------PIPELINE CLASSES---------------------------------------------------
6383 // Pipeline Classes describe the stages in which input and output are
6384 // referenced by the hardware pipeline.
6385
6386 // Naming convention: ialu or fpu
6387 // Then: _reg
6388 // Then: _reg if there is a 2nd register
6389 // Then: _long if it's a pair of instructions implementing a long
6390 // Then: _fat if it requires the big decoder
6391 // Or: _mem if it requires the big decoder and a memory unit.
6392
6393 // Integer ALU reg operation
6394 pipe_class ialu_reg(rRegI dst)
6395 %{
6396 single_instruction;
6397 dst : S4(write);
6398 dst : S3(read);
6399 DECODE : S0; // any decoder
6400 ALU : S3; // any alu
6401 %}
6402
6403 // Long ALU reg operation
6404 pipe_class ialu_reg_long(rRegL dst)
6405 %{
6406 instruction_count(2);
6407 dst : S4(write);
6408 dst : S3(read);
6409 DECODE : S0(2); // any 2 decoders
6410 ALU : S3(2); // both alus
6411 %}
6412
6413 // Integer ALU reg operation using big decoder
6414 pipe_class ialu_reg_fat(rRegI dst)
6415 %{
6416 single_instruction;
6417 dst : S4(write);
6418 dst : S3(read);
6419 D0 : S0; // big decoder only
6420 ALU : S3; // any alu
6421 %}
6422
6423 // Integer ALU reg-reg operation
6424 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6425 %{
6426 single_instruction;
6427 dst : S4(write);
6428 src : S3(read);
6429 DECODE : S0; // any decoder
6430 ALU : S3; // any alu
6431 %}
6432
6433 // Integer ALU reg-reg operation
6434 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6435 %{
6436 single_instruction;
6437 dst : S4(write);
6438 src : S3(read);
6439 D0 : S0; // big decoder only
6440 ALU : S3; // any alu
6441 %}
6442
6443 // Integer ALU reg-mem operation
6444 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6445 %{
6446 single_instruction;
6447 dst : S5(write);
6448 mem : S3(read);
6449 D0 : S0; // big decoder only
6450 ALU : S4; // any alu
6451 MEM : S3; // any mem
6452 %}
6453
6454 // Integer mem operation (prefetch)
6455 pipe_class ialu_mem(memory mem)
6456 %{
6457 single_instruction;
6458 mem : S3(read);
6459 D0 : S0; // big decoder only
6460 MEM : S3; // any mem
6461 %}
6462
6463 // Integer Store to Memory
6464 pipe_class ialu_mem_reg(memory mem, rRegI src)
6465 %{
6466 single_instruction;
6467 mem : S3(read);
6468 src : S5(read);
6469 D0 : S0; // big decoder only
6470 ALU : S4; // any alu
6471 MEM : S3;
6472 %}
6473
6474 // // Long Store to Memory
6475 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6476 // %{
6477 // instruction_count(2);
6478 // mem : S3(read);
6479 // src : S5(read);
6480 // D0 : S0(2); // big decoder only; twice
6481 // ALU : S4(2); // any 2 alus
6482 // MEM : S3(2); // Both mems
6483 // %}
6484
6485 // Integer Store to Memory
6486 pipe_class ialu_mem_imm(memory mem)
6487 %{
6488 single_instruction;
6489 mem : S3(read);
6490 D0 : S0; // big decoder only
6491 ALU : S4; // any alu
6492 MEM : S3;
6493 %}
6494
6495 // Integer ALU0 reg-reg operation
6496 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6497 %{
6498 single_instruction;
6499 dst : S4(write);
6500 src : S3(read);
6501 D0 : S0; // Big decoder only
6502 ALU0 : S3; // only alu0
6503 %}
6504
6505 // Integer ALU0 reg-mem operation
6506 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6507 %{
6508 single_instruction;
6509 dst : S5(write);
6510 mem : S3(read);
6511 D0 : S0; // big decoder only
6512 ALU0 : S4; // ALU0 only
6513 MEM : S3; // any mem
6514 %}
6515
6516 // Integer ALU reg-reg operation
6517 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6518 %{
6519 single_instruction;
6520 cr : S4(write);
6521 src1 : S3(read);
6522 src2 : S3(read);
6523 DECODE : S0; // any decoder
6524 ALU : S3; // any alu
6525 %}
6526
6527 // Integer ALU reg-imm operation
6528 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6529 %{
6530 single_instruction;
6531 cr : S4(write);
6532 src1 : S3(read);
6533 DECODE : S0; // any decoder
6534 ALU : S3; // any alu
6535 %}
6536
6537 // Integer ALU reg-mem operation
6538 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6539 %{
6540 single_instruction;
6541 cr : S4(write);
6542 src1 : S3(read);
6543 src2 : S3(read);
6544 D0 : S0; // big decoder only
6545 ALU : S4; // any alu
6546 MEM : S3;
6547 %}
6548
6549 // Conditional move reg-reg
6550 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6551 %{
6552 instruction_count(4);
6553 y : S4(read);
6554 q : S3(read);
6555 p : S3(read);
6556 DECODE : S0(4); // any decoder
6557 %}
6558
6559 // Conditional move reg-reg
6560 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6561 %{
6562 single_instruction;
6563 dst : S4(write);
6564 src : S3(read);
6565 cr : S3(read);
6566 DECODE : S0; // any decoder
6567 %}
6568
6569 // Conditional move reg-mem
6570 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6571 %{
6572 single_instruction;
6573 dst : S4(write);
6574 src : S3(read);
6575 cr : S3(read);
6576 DECODE : S0; // any decoder
6577 MEM : S3;
6578 %}
6579
6580 // Conditional move reg-reg long
6581 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6582 %{
6583 single_instruction;
6584 dst : S4(write);
6585 src : S3(read);
6586 cr : S3(read);
6587 DECODE : S0(2); // any 2 decoders
6588 %}
6589
6590 // Float reg-reg operation
6591 pipe_class fpu_reg(regD dst)
6592 %{
6593 instruction_count(2);
6594 dst : S3(read);
6595 DECODE : S0(2); // any 2 decoders
6596 FPU : S3;
6597 %}
6598
6599 // Float reg-reg operation
6600 pipe_class fpu_reg_reg(regD dst, regD src)
6601 %{
6602 instruction_count(2);
6603 dst : S4(write);
6604 src : S3(read);
6605 DECODE : S0(2); // any 2 decoders
6606 FPU : S3;
6607 %}
6608
6609 // Float reg-reg operation
6610 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6611 %{
6612 instruction_count(3);
6613 dst : S4(write);
6614 src1 : S3(read);
6615 src2 : S3(read);
6616 DECODE : S0(3); // any 3 decoders
6617 FPU : S3(2);
6618 %}
6619
6620 // Float reg-reg operation
6621 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6622 %{
6623 instruction_count(4);
6624 dst : S4(write);
6625 src1 : S3(read);
6626 src2 : S3(read);
6627 src3 : S3(read);
6628 DECODE : S0(4); // any 3 decoders
6629 FPU : S3(2);
6630 %}
6631
6632 // Float reg-reg operation
6633 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6634 %{
6635 instruction_count(4);
6636 dst : S4(write);
6637 src1 : S3(read);
6638 src2 : S3(read);
6639 src3 : S3(read);
6640 DECODE : S1(3); // any 3 decoders
6641 D0 : S0; // Big decoder only
6642 FPU : S3(2);
6643 MEM : S3;
6644 %}
6645
6646 // Float reg-mem operation
6647 pipe_class fpu_reg_mem(regD dst, memory mem)
6648 %{
6649 instruction_count(2);
6650 dst : S5(write);
6651 mem : S3(read);
6652 D0 : S0; // big decoder only
6653 DECODE : S1; // any decoder for FPU POP
6654 FPU : S4;
6655 MEM : S3; // any mem
6656 %}
6657
6658 // Float reg-mem operation
6659 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6660 %{
6661 instruction_count(3);
6662 dst : S5(write);
6663 src1 : S3(read);
6664 mem : S3(read);
6665 D0 : S0; // big decoder only
6666 DECODE : S1(2); // any decoder for FPU POP
6667 FPU : S4;
6668 MEM : S3; // any mem
6669 %}
6670
6671 // Float mem-reg operation
6672 pipe_class fpu_mem_reg(memory mem, regD src)
6673 %{
6674 instruction_count(2);
6675 src : S5(read);
6676 mem : S3(read);
6677 DECODE : S0; // any decoder for FPU PUSH
6678 D0 : S1; // big decoder only
6679 FPU : S4;
6680 MEM : S3; // any mem
6681 %}
6682
6683 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6684 %{
6685 instruction_count(3);
6686 src1 : S3(read);
6687 src2 : S3(read);
6688 mem : S3(read);
6689 DECODE : S0(2); // any decoder for FPU PUSH
6690 D0 : S1; // big decoder only
6691 FPU : S4;
6692 MEM : S3; // any mem
6693 %}
6694
6695 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6696 %{
6697 instruction_count(3);
6698 src1 : S3(read);
6699 src2 : S3(read);
6700 mem : S4(read);
6701 DECODE : S0; // any decoder for FPU PUSH
6702 D0 : S0(2); // big decoder only
6703 FPU : S4;
6704 MEM : S3(2); // any mem
6705 %}
6706
6707 pipe_class fpu_mem_mem(memory dst, memory src1)
6708 %{
6709 instruction_count(2);
6710 src1 : S3(read);
6711 dst : S4(read);
6712 D0 : S0(2); // big decoder only
6713 MEM : S3(2); // any mem
6714 %}
6715
6716 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6717 %{
6718 instruction_count(3);
6719 src1 : S3(read);
6720 src2 : S3(read);
6721 dst : S4(read);
6722 D0 : S0(3); // big decoder only
6723 FPU : S4;
6724 MEM : S3(3); // any mem
6725 %}
6726
6727 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6728 %{
6729 instruction_count(3);
6730 src1 : S4(read);
6731 mem : S4(read);
6732 DECODE : S0; // any decoder for FPU PUSH
6733 D0 : S0(2); // big decoder only
6734 FPU : S4;
6735 MEM : S3(2); // any mem
6736 %}
6737
6738 // Float load constant
6739 pipe_class fpu_reg_con(regD dst)
6740 %{
6741 instruction_count(2);
6742 dst : S5(write);
6743 D0 : S0; // big decoder only for the load
6744 DECODE : S1; // any decoder for FPU POP
6745 FPU : S4;
6746 MEM : S3; // any mem
6747 %}
6748
6749 // Float load constant
6750 pipe_class fpu_reg_reg_con(regD dst, regD src)
6751 %{
6752 instruction_count(3);
6753 dst : S5(write);
6754 src : S3(read);
6755 D0 : S0; // big decoder only for the load
6756 DECODE : S1(2); // any decoder for FPU POP
6757 FPU : S4;
6758 MEM : S3; // any mem
6759 %}
6760
6761 // UnConditional branch
6762 pipe_class pipe_jmp(label labl)
6763 %{
6764 single_instruction;
6765 BR : S3;
6766 %}
6767
6768 // Conditional branch
6769 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6770 %{
6771 single_instruction;
6772 cr : S1(read);
6773 BR : S3;
6774 %}
6775
6776 // Allocation idiom
6777 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6778 %{
6779 instruction_count(1); force_serialization;
6780 fixed_latency(6);
6781 heap_ptr : S3(read);
6782 DECODE : S0(3);
6783 D0 : S2;
6784 MEM : S3;
6785 ALU : S3(2);
6786 dst : S5(write);
6787 BR : S5;
6788 %}
6789
6790 // Generic big/slow expanded idiom
6791 pipe_class pipe_slow()
6792 %{
6793 instruction_count(10); multiple_bundles; force_serialization;
6794 fixed_latency(100);
6795 D0 : S0(2);
6796 MEM : S3(2);
6797 %}
6798
6799 // The real do-nothing guy
6800 pipe_class empty()
6801 %{
6802 instruction_count(0);
6803 %}
6804
6805 // Define the class for the Nop node
6806 define
6807 %{
6808 MachNop = empty;
6809 %}
6810
6811 %}
6812
6813 //----------INSTRUCTIONS-------------------------------------------------------
6814 //
6815 // match -- States which machine-independent subtree may be replaced
6816 // by this instruction.
6817 // ins_cost -- The estimated cost of this instruction is used by instruction
6818 // selection to identify a minimum cost tree of machine
6819 // instructions that matches a tree of machine-independent
6820 // instructions.
6821 // format -- A string providing the disassembly for this instruction.
6822 // The value of an instruction's operand may be inserted
6823 // by referring to it with a '$' prefix.
6824 // opcode -- Three instruction opcodes may be provided. These are referred
6825 // to within an encode class as $primary, $secondary, and $tertiary
6826 // rrspectively. The primary opcode is commonly used to
6827 // indicate the type of machine instruction, while secondary
6828 // and tertiary are often used for prefix options or addressing
6829 // modes.
6830 // ins_encode -- A list of encode classes with parameters. The encode class
6831 // name must have been defined in an 'enc_class' specification
6832 // in the encode section of the architecture description.
6833
6834 // ============================================================================
6835
6836 instruct ShouldNotReachHere() %{
6837 match(Halt);
6838 format %{ "stop\t# ShouldNotReachHere" %}
6839 ins_encode %{
6840 if (is_reachable()) {
6841 const char* str = __ code_string(_halt_reason);
6842 __ stop(str);
6843 }
6844 %}
6845 ins_pipe(pipe_slow);
6846 %}
6847
6848 // ============================================================================
6849
6850 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6851 // Load Float
6852 instruct MoveF2VL(vlRegF dst, regF src) %{
6853 match(Set dst src);
6854 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6855 ins_encode %{
6856 ShouldNotReachHere();
6857 %}
6858 ins_pipe( fpu_reg_reg );
6859 %}
6860
6861 // Load Float
6862 instruct MoveF2LEG(legRegF dst, regF src) %{
6863 match(Set dst src);
6864 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6865 ins_encode %{
6866 ShouldNotReachHere();
6867 %}
6868 ins_pipe( fpu_reg_reg );
6869 %}
6870
6871 // Load Float
6872 instruct MoveVL2F(regF dst, vlRegF src) %{
6873 match(Set dst src);
6874 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6875 ins_encode %{
6876 ShouldNotReachHere();
6877 %}
6878 ins_pipe( fpu_reg_reg );
6879 %}
6880
6881 // Load Float
6882 instruct MoveLEG2F(regF dst, legRegF src) %{
6883 match(Set dst src);
6884 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6885 ins_encode %{
6886 ShouldNotReachHere();
6887 %}
6888 ins_pipe( fpu_reg_reg );
6889 %}
6890
6891 // Load Double
6892 instruct MoveD2VL(vlRegD dst, regD src) %{
6893 match(Set dst src);
6894 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6895 ins_encode %{
6896 ShouldNotReachHere();
6897 %}
6898 ins_pipe( fpu_reg_reg );
6899 %}
6900
6901 // Load Double
6902 instruct MoveD2LEG(legRegD dst, regD src) %{
6903 match(Set dst src);
6904 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6905 ins_encode %{
6906 ShouldNotReachHere();
6907 %}
6908 ins_pipe( fpu_reg_reg );
6909 %}
6910
6911 // Load Double
6912 instruct MoveVL2D(regD dst, vlRegD src) %{
6913 match(Set dst src);
6914 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6915 ins_encode %{
6916 ShouldNotReachHere();
6917 %}
6918 ins_pipe( fpu_reg_reg );
6919 %}
6920
6921 // Load Double
6922 instruct MoveLEG2D(regD dst, legRegD src) %{
6923 match(Set dst src);
6924 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6925 ins_encode %{
6926 ShouldNotReachHere();
6927 %}
6928 ins_pipe( fpu_reg_reg );
6929 %}
6930
6931 //----------Load/Store/Move Instructions---------------------------------------
6932 //----------Load Instructions--------------------------------------------------
6933
6934 // Load Byte (8 bit signed)
6935 instruct loadB(rRegI dst, memory mem)
6936 %{
6937 match(Set dst (LoadB mem));
6938
6939 ins_cost(125);
6940 format %{ "movsbl $dst, $mem\t# byte" %}
6941
6942 ins_encode %{
6943 __ movsbl($dst$$Register, $mem$$Address);
6944 %}
6945
6946 ins_pipe(ialu_reg_mem);
6947 %}
6948
6949 // Load Byte (8 bit signed) into Long Register
6950 instruct loadB2L(rRegL dst, memory mem)
6951 %{
6952 match(Set dst (ConvI2L (LoadB mem)));
6953
6954 ins_cost(125);
6955 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6956
6957 ins_encode %{
6958 __ movsbq($dst$$Register, $mem$$Address);
6959 %}
6960
6961 ins_pipe(ialu_reg_mem);
6962 %}
6963
6964 // Load Unsigned Byte (8 bit UNsigned)
6965 instruct loadUB(rRegI dst, memory mem)
6966 %{
6967 match(Set dst (LoadUB mem));
6968
6969 ins_cost(125);
6970 format %{ "movzbl $dst, $mem\t# ubyte" %}
6971
6972 ins_encode %{
6973 __ movzbl($dst$$Register, $mem$$Address);
6974 %}
6975
6976 ins_pipe(ialu_reg_mem);
6977 %}
6978
6979 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6980 instruct loadUB2L(rRegL dst, memory mem)
6981 %{
6982 match(Set dst (ConvI2L (LoadUB mem)));
6983
6984 ins_cost(125);
6985 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6986
6987 ins_encode %{
6988 __ movzbq($dst$$Register, $mem$$Address);
6989 %}
6990
6991 ins_pipe(ialu_reg_mem);
6992 %}
6993
6994 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6995 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6996 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6997 effect(KILL cr);
6998
6999 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
7000 "andl $dst, right_n_bits($mask, 8)" %}
7001 ins_encode %{
7002 Register Rdst = $dst$$Register;
7003 __ movzbq(Rdst, $mem$$Address);
7004 __ andl(Rdst, $mask$$constant & right_n_bits(8));
7005 %}
7006 ins_pipe(ialu_reg_mem);
7007 %}
7008
7009 // Load Short (16 bit signed)
7010 instruct loadS(rRegI dst, memory mem)
7011 %{
7012 match(Set dst (LoadS mem));
7013
7014 ins_cost(125);
7015 format %{ "movswl $dst, $mem\t# short" %}
7016
7017 ins_encode %{
7018 __ movswl($dst$$Register, $mem$$Address);
7019 %}
7020
7021 ins_pipe(ialu_reg_mem);
7022 %}
7023
7024 // Load Short (16 bit signed) to Byte (8 bit signed)
7025 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7026 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
7027
7028 ins_cost(125);
7029 format %{ "movsbl $dst, $mem\t# short -> byte" %}
7030 ins_encode %{
7031 __ movsbl($dst$$Register, $mem$$Address);
7032 %}
7033 ins_pipe(ialu_reg_mem);
7034 %}
7035
7036 // Load Short (16 bit signed) into Long Register
7037 instruct loadS2L(rRegL dst, memory mem)
7038 %{
7039 match(Set dst (ConvI2L (LoadS mem)));
7040
7041 ins_cost(125);
7042 format %{ "movswq $dst, $mem\t# short -> long" %}
7043
7044 ins_encode %{
7045 __ movswq($dst$$Register, $mem$$Address);
7046 %}
7047
7048 ins_pipe(ialu_reg_mem);
7049 %}
7050
7051 // Load Unsigned Short/Char (16 bit UNsigned)
7052 instruct loadUS(rRegI dst, memory mem)
7053 %{
7054 match(Set dst (LoadUS mem));
7055
7056 ins_cost(125);
7057 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7058
7059 ins_encode %{
7060 __ movzwl($dst$$Register, $mem$$Address);
7061 %}
7062
7063 ins_pipe(ialu_reg_mem);
7064 %}
7065
7066 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7067 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7068 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7069
7070 ins_cost(125);
7071 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7072 ins_encode %{
7073 __ movsbl($dst$$Register, $mem$$Address);
7074 %}
7075 ins_pipe(ialu_reg_mem);
7076 %}
7077
7078 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7079 instruct loadUS2L(rRegL dst, memory mem)
7080 %{
7081 match(Set dst (ConvI2L (LoadUS mem)));
7082
7083 ins_cost(125);
7084 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7085
7086 ins_encode %{
7087 __ movzwq($dst$$Register, $mem$$Address);
7088 %}
7089
7090 ins_pipe(ialu_reg_mem);
7091 %}
7092
7093 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7094 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7095 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7096
7097 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7098 ins_encode %{
7099 __ movzbq($dst$$Register, $mem$$Address);
7100 %}
7101 ins_pipe(ialu_reg_mem);
7102 %}
7103
7104 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7105 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7106 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7107 effect(KILL cr);
7108
7109 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7110 "andl $dst, right_n_bits($mask, 16)" %}
7111 ins_encode %{
7112 Register Rdst = $dst$$Register;
7113 __ movzwq(Rdst, $mem$$Address);
7114 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7115 %}
7116 ins_pipe(ialu_reg_mem);
7117 %}
7118
7119 // Load Integer
7120 instruct loadI(rRegI dst, memory mem)
7121 %{
7122 match(Set dst (LoadI mem));
7123
7124 ins_cost(125);
7125 format %{ "movl $dst, $mem\t# int" %}
7126
7127 ins_encode %{
7128 __ movl($dst$$Register, $mem$$Address);
7129 %}
7130
7131 ins_pipe(ialu_reg_mem);
7132 %}
7133
7134 // Load Integer (32 bit signed) to Byte (8 bit signed)
7135 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7136 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7137
7138 ins_cost(125);
7139 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7140 ins_encode %{
7141 __ movsbl($dst$$Register, $mem$$Address);
7142 %}
7143 ins_pipe(ialu_reg_mem);
7144 %}
7145
7146 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7147 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7148 match(Set dst (AndI (LoadI mem) mask));
7149
7150 ins_cost(125);
7151 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7152 ins_encode %{
7153 __ movzbl($dst$$Register, $mem$$Address);
7154 %}
7155 ins_pipe(ialu_reg_mem);
7156 %}
7157
7158 // Load Integer (32 bit signed) to Short (16 bit signed)
7159 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7160 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7161
7162 ins_cost(125);
7163 format %{ "movswl $dst, $mem\t# int -> short" %}
7164 ins_encode %{
7165 __ movswl($dst$$Register, $mem$$Address);
7166 %}
7167 ins_pipe(ialu_reg_mem);
7168 %}
7169
7170 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7171 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7172 match(Set dst (AndI (LoadI mem) mask));
7173
7174 ins_cost(125);
7175 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7176 ins_encode %{
7177 __ movzwl($dst$$Register, $mem$$Address);
7178 %}
7179 ins_pipe(ialu_reg_mem);
7180 %}
7181
7182 // Load Integer into Long Register
7183 instruct loadI2L(rRegL dst, memory mem)
7184 %{
7185 match(Set dst (ConvI2L (LoadI mem)));
7186
7187 ins_cost(125);
7188 format %{ "movslq $dst, $mem\t# int -> long" %}
7189
7190 ins_encode %{
7191 __ movslq($dst$$Register, $mem$$Address);
7192 %}
7193
7194 ins_pipe(ialu_reg_mem);
7195 %}
7196
7197 // Load Integer with mask 0xFF into Long Register
7198 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7199 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7200
7201 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7202 ins_encode %{
7203 __ movzbq($dst$$Register, $mem$$Address);
7204 %}
7205 ins_pipe(ialu_reg_mem);
7206 %}
7207
7208 // Load Integer with mask 0xFFFF into Long Register
7209 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7210 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7211
7212 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7213 ins_encode %{
7214 __ movzwq($dst$$Register, $mem$$Address);
7215 %}
7216 ins_pipe(ialu_reg_mem);
7217 %}
7218
7219 // Load Integer with a 31-bit mask into Long Register
7220 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7221 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7222 effect(KILL cr);
7223
7224 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7225 "andl $dst, $mask" %}
7226 ins_encode %{
7227 Register Rdst = $dst$$Register;
7228 __ movl(Rdst, $mem$$Address);
7229 __ andl(Rdst, $mask$$constant);
7230 %}
7231 ins_pipe(ialu_reg_mem);
7232 %}
7233
7234 // Load Unsigned Integer into Long Register
7235 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7236 %{
7237 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7238
7239 ins_cost(125);
7240 format %{ "movl $dst, $mem\t# uint -> long" %}
7241
7242 ins_encode %{
7243 __ movl($dst$$Register, $mem$$Address);
7244 %}
7245
7246 ins_pipe(ialu_reg_mem);
7247 %}
7248
7249 // Load Long
7250 instruct loadL(rRegL dst, memory mem)
7251 %{
7252 match(Set dst (LoadL mem));
7253
7254 ins_cost(125);
7255 format %{ "movq $dst, $mem\t# long" %}
7256
7257 ins_encode %{
7258 __ movq($dst$$Register, $mem$$Address);
7259 %}
7260
7261 ins_pipe(ialu_reg_mem); // XXX
7262 %}
7263
7264 // Load Range
7265 instruct loadRange(rRegI dst, memory mem)
7266 %{
7267 match(Set dst (LoadRange mem));
7268
7269 ins_cost(125); // XXX
7270 format %{ "movl $dst, $mem\t# range" %}
7271 ins_encode %{
7272 __ movl($dst$$Register, $mem$$Address);
7273 %}
7274 ins_pipe(ialu_reg_mem);
7275 %}
7276
7277 // Load Pointer
7278 instruct loadP(rRegP dst, memory mem)
7279 %{
7280 match(Set dst (LoadP mem));
7281 predicate(n->as_Load()->barrier_data() == 0);
7282
7283 ins_cost(125); // XXX
7284 format %{ "movq $dst, $mem\t# ptr" %}
7285 ins_encode %{
7286 __ movq($dst$$Register, $mem$$Address);
7287 %}
7288 ins_pipe(ialu_reg_mem); // XXX
7289 %}
7290
7291 // Load Compressed Pointer
7292 instruct loadN(rRegN dst, memory mem)
7293 %{
7294 predicate(n->as_Load()->barrier_data() == 0);
7295 match(Set dst (LoadN mem));
7296
7297 ins_cost(125); // XXX
7298 format %{ "movl $dst, $mem\t# compressed ptr" %}
7299 ins_encode %{
7300 __ movl($dst$$Register, $mem$$Address);
7301 %}
7302 ins_pipe(ialu_reg_mem); // XXX
7303 %}
7304
7305
7306 // Load Klass Pointer
7307 instruct loadKlass(rRegP dst, memory mem)
7308 %{
7309 match(Set dst (LoadKlass mem));
7310
7311 ins_cost(125); // XXX
7312 format %{ "movq $dst, $mem\t# class" %}
7313 ins_encode %{
7314 __ movq($dst$$Register, $mem$$Address);
7315 %}
7316 ins_pipe(ialu_reg_mem); // XXX
7317 %}
7318
7319 // Load narrow Klass Pointer
7320 instruct loadNKlass(rRegN dst, memory mem)
7321 %{
7322 predicate(!UseCompactObjectHeaders);
7323 match(Set dst (LoadNKlass mem));
7324
7325 ins_cost(125); // XXX
7326 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7327 ins_encode %{
7328 __ movl($dst$$Register, $mem$$Address);
7329 %}
7330 ins_pipe(ialu_reg_mem); // XXX
7331 %}
7332
7333 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7334 %{
7335 predicate(UseCompactObjectHeaders);
7336 match(Set dst (LoadNKlass mem));
7337 effect(KILL cr);
7338 ins_cost(125);
7339 format %{
7340 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7341 "shrl $dst, markWord::klass_shift_at_offset"
7342 %}
7343 ins_encode %{
7344 if (UseAPX) {
7345 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7346 }
7347 else {
7348 __ movl($dst$$Register, $mem$$Address);
7349 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7350 }
7351 %}
7352 ins_pipe(ialu_reg_mem);
7353 %}
7354
7355 // Load Float
7356 instruct loadF(regF dst, memory mem)
7357 %{
7358 match(Set dst (LoadF mem));
7359
7360 ins_cost(145); // XXX
7361 format %{ "movss $dst, $mem\t# float" %}
7362 ins_encode %{
7363 __ movflt($dst$$XMMRegister, $mem$$Address);
7364 %}
7365 ins_pipe(pipe_slow); // XXX
7366 %}
7367
7368 // Load Double
7369 instruct loadD_partial(regD dst, memory mem)
7370 %{
7371 predicate(!UseXmmLoadAndClearUpper);
7372 match(Set dst (LoadD mem));
7373
7374 ins_cost(145); // XXX
7375 format %{ "movlpd $dst, $mem\t# double" %}
7376 ins_encode %{
7377 __ movdbl($dst$$XMMRegister, $mem$$Address);
7378 %}
7379 ins_pipe(pipe_slow); // XXX
7380 %}
7381
7382 instruct loadD(regD dst, memory mem)
7383 %{
7384 predicate(UseXmmLoadAndClearUpper);
7385 match(Set dst (LoadD mem));
7386
7387 ins_cost(145); // XXX
7388 format %{ "movsd $dst, $mem\t# double" %}
7389 ins_encode %{
7390 __ movdbl($dst$$XMMRegister, $mem$$Address);
7391 %}
7392 ins_pipe(pipe_slow); // XXX
7393 %}
7394
7395 // max = java.lang.Math.max(float a, float b)
7396 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7397 predicate(VM_Version::supports_avx10_2());
7398 match(Set dst (MaxF a b));
7399 format %{ "maxF $dst, $a, $b" %}
7400 ins_encode %{
7401 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7402 %}
7403 ins_pipe( pipe_slow );
7404 %}
7405
7406 // max = java.lang.Math.max(float a, float b)
7407 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7408 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7409 match(Set dst (MaxF a b));
7410 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7411 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7412 ins_encode %{
7413 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7414 %}
7415 ins_pipe( pipe_slow );
7416 %}
7417
7418 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7419 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7420 match(Set dst (MaxF a b));
7421 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7422
7423 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7424 ins_encode %{
7425 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7426 false /*min*/, true /*single*/);
7427 %}
7428 ins_pipe( pipe_slow );
7429 %}
7430
7431 // max = java.lang.Math.max(double a, double b)
7432 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7433 predicate(VM_Version::supports_avx10_2());
7434 match(Set dst (MaxD a b));
7435 format %{ "maxD $dst, $a, $b" %}
7436 ins_encode %{
7437 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7438 %}
7439 ins_pipe( pipe_slow );
7440 %}
7441
7442 // max = java.lang.Math.max(double a, double b)
7443 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7444 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7445 match(Set dst (MaxD a b));
7446 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7447 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7448 ins_encode %{
7449 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7450 %}
7451 ins_pipe( pipe_slow );
7452 %}
7453
7454 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7455 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7456 match(Set dst (MaxD a b));
7457 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7458
7459 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7460 ins_encode %{
7461 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7462 false /*min*/, false /*single*/);
7463 %}
7464 ins_pipe( pipe_slow );
7465 %}
7466
7467 // max = java.lang.Math.min(float a, float b)
7468 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7469 predicate(VM_Version::supports_avx10_2());
7470 match(Set dst (MinF a b));
7471 format %{ "minF $dst, $a, $b" %}
7472 ins_encode %{
7473 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7474 %}
7475 ins_pipe( pipe_slow );
7476 %}
7477
7478 // min = java.lang.Math.min(float a, float b)
7479 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7480 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7481 match(Set dst (MinF a b));
7482 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7483 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7484 ins_encode %{
7485 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7486 %}
7487 ins_pipe( pipe_slow );
7488 %}
7489
7490 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7491 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7492 match(Set dst (MinF a b));
7493 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7494
7495 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7496 ins_encode %{
7497 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7498 true /*min*/, true /*single*/);
7499 %}
7500 ins_pipe( pipe_slow );
7501 %}
7502
7503 // max = java.lang.Math.min(double a, double b)
7504 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7505 predicate(VM_Version::supports_avx10_2());
7506 match(Set dst (MinD a b));
7507 format %{ "minD $dst, $a, $b" %}
7508 ins_encode %{
7509 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7510 %}
7511 ins_pipe( pipe_slow );
7512 %}
7513
7514 // min = java.lang.Math.min(double a, double b)
7515 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7516 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7517 match(Set dst (MinD a b));
7518 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7519 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7520 ins_encode %{
7521 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7522 %}
7523 ins_pipe( pipe_slow );
7524 %}
7525
7526 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7527 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7528 match(Set dst (MinD a b));
7529 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7530
7531 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7532 ins_encode %{
7533 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7534 true /*min*/, false /*single*/);
7535 %}
7536 ins_pipe( pipe_slow );
7537 %}
7538
7539 // Load Effective Address
7540 instruct leaP8(rRegP dst, indOffset8 mem)
7541 %{
7542 match(Set dst mem);
7543
7544 ins_cost(110); // XXX
7545 format %{ "leaq $dst, $mem\t# ptr 8" %}
7546 ins_encode %{
7547 __ leaq($dst$$Register, $mem$$Address);
7548 %}
7549 ins_pipe(ialu_reg_reg_fat);
7550 %}
7551
7552 instruct leaP32(rRegP dst, indOffset32 mem)
7553 %{
7554 match(Set dst mem);
7555
7556 ins_cost(110);
7557 format %{ "leaq $dst, $mem\t# ptr 32" %}
7558 ins_encode %{
7559 __ leaq($dst$$Register, $mem$$Address);
7560 %}
7561 ins_pipe(ialu_reg_reg_fat);
7562 %}
7563
7564 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7565 %{
7566 match(Set dst mem);
7567
7568 ins_cost(110);
7569 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7570 ins_encode %{
7571 __ leaq($dst$$Register, $mem$$Address);
7572 %}
7573 ins_pipe(ialu_reg_reg_fat);
7574 %}
7575
7576 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7577 %{
7578 match(Set dst mem);
7579
7580 ins_cost(110);
7581 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7582 ins_encode %{
7583 __ leaq($dst$$Register, $mem$$Address);
7584 %}
7585 ins_pipe(ialu_reg_reg_fat);
7586 %}
7587
7588 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7589 %{
7590 match(Set dst mem);
7591
7592 ins_cost(110);
7593 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7594 ins_encode %{
7595 __ leaq($dst$$Register, $mem$$Address);
7596 %}
7597 ins_pipe(ialu_reg_reg_fat);
7598 %}
7599
7600 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7601 %{
7602 match(Set dst mem);
7603
7604 ins_cost(110);
7605 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7606 ins_encode %{
7607 __ leaq($dst$$Register, $mem$$Address);
7608 %}
7609 ins_pipe(ialu_reg_reg_fat);
7610 %}
7611
7612 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7613 %{
7614 match(Set dst mem);
7615
7616 ins_cost(110);
7617 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7618 ins_encode %{
7619 __ leaq($dst$$Register, $mem$$Address);
7620 %}
7621 ins_pipe(ialu_reg_reg_fat);
7622 %}
7623
7624 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7625 %{
7626 match(Set dst mem);
7627
7628 ins_cost(110);
7629 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7630 ins_encode %{
7631 __ leaq($dst$$Register, $mem$$Address);
7632 %}
7633 ins_pipe(ialu_reg_reg_fat);
7634 %}
7635
7636 // Load Effective Address which uses Narrow (32-bits) oop
7637 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7638 %{
7639 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7640 match(Set dst mem);
7641
7642 ins_cost(110);
7643 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7644 ins_encode %{
7645 __ leaq($dst$$Register, $mem$$Address);
7646 %}
7647 ins_pipe(ialu_reg_reg_fat);
7648 %}
7649
7650 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7651 %{
7652 predicate(CompressedOops::shift() == 0);
7653 match(Set dst mem);
7654
7655 ins_cost(110); // XXX
7656 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7657 ins_encode %{
7658 __ leaq($dst$$Register, $mem$$Address);
7659 %}
7660 ins_pipe(ialu_reg_reg_fat);
7661 %}
7662
7663 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7664 %{
7665 predicate(CompressedOops::shift() == 0);
7666 match(Set dst mem);
7667
7668 ins_cost(110);
7669 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7670 ins_encode %{
7671 __ leaq($dst$$Register, $mem$$Address);
7672 %}
7673 ins_pipe(ialu_reg_reg_fat);
7674 %}
7675
7676 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7677 %{
7678 predicate(CompressedOops::shift() == 0);
7679 match(Set dst mem);
7680
7681 ins_cost(110);
7682 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7683 ins_encode %{
7684 __ leaq($dst$$Register, $mem$$Address);
7685 %}
7686 ins_pipe(ialu_reg_reg_fat);
7687 %}
7688
7689 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7690 %{
7691 predicate(CompressedOops::shift() == 0);
7692 match(Set dst mem);
7693
7694 ins_cost(110);
7695 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7696 ins_encode %{
7697 __ leaq($dst$$Register, $mem$$Address);
7698 %}
7699 ins_pipe(ialu_reg_reg_fat);
7700 %}
7701
7702 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7703 %{
7704 predicate(CompressedOops::shift() == 0);
7705 match(Set dst mem);
7706
7707 ins_cost(110);
7708 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7709 ins_encode %{
7710 __ leaq($dst$$Register, $mem$$Address);
7711 %}
7712 ins_pipe(ialu_reg_reg_fat);
7713 %}
7714
7715 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7716 %{
7717 predicate(CompressedOops::shift() == 0);
7718 match(Set dst mem);
7719
7720 ins_cost(110);
7721 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7722 ins_encode %{
7723 __ leaq($dst$$Register, $mem$$Address);
7724 %}
7725 ins_pipe(ialu_reg_reg_fat);
7726 %}
7727
7728 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7729 %{
7730 predicate(CompressedOops::shift() == 0);
7731 match(Set dst mem);
7732
7733 ins_cost(110);
7734 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7735 ins_encode %{
7736 __ leaq($dst$$Register, $mem$$Address);
7737 %}
7738 ins_pipe(ialu_reg_reg_fat);
7739 %}
7740
7741 instruct loadConI(rRegI dst, immI src)
7742 %{
7743 match(Set dst src);
7744
7745 format %{ "movl $dst, $src\t# int" %}
7746 ins_encode %{
7747 __ movl($dst$$Register, $src$$constant);
7748 %}
7749 ins_pipe(ialu_reg_fat); // XXX
7750 %}
7751
7752 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7753 %{
7754 match(Set dst src);
7755 effect(KILL cr);
7756
7757 ins_cost(50);
7758 format %{ "xorl $dst, $dst\t# int" %}
7759 ins_encode %{
7760 __ xorl($dst$$Register, $dst$$Register);
7761 %}
7762 ins_pipe(ialu_reg);
7763 %}
7764
7765 instruct loadConL(rRegL dst, immL src)
7766 %{
7767 match(Set dst src);
7768
7769 ins_cost(150);
7770 format %{ "movq $dst, $src\t# long" %}
7771 ins_encode %{
7772 __ mov64($dst$$Register, $src$$constant);
7773 %}
7774 ins_pipe(ialu_reg);
7775 %}
7776
7777 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7778 %{
7779 match(Set dst src);
7780 effect(KILL cr);
7781
7782 ins_cost(50);
7783 format %{ "xorl $dst, $dst\t# long" %}
7784 ins_encode %{
7785 __ xorl($dst$$Register, $dst$$Register);
7786 %}
7787 ins_pipe(ialu_reg); // XXX
7788 %}
7789
7790 instruct loadConUL32(rRegL dst, immUL32 src)
7791 %{
7792 match(Set dst src);
7793
7794 ins_cost(60);
7795 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7796 ins_encode %{
7797 __ movl($dst$$Register, $src$$constant);
7798 %}
7799 ins_pipe(ialu_reg);
7800 %}
7801
7802 instruct loadConL32(rRegL dst, immL32 src)
7803 %{
7804 match(Set dst src);
7805
7806 ins_cost(70);
7807 format %{ "movq $dst, $src\t# long (32-bit)" %}
7808 ins_encode %{
7809 __ movq($dst$$Register, $src$$constant);
7810 %}
7811 ins_pipe(ialu_reg);
7812 %}
7813
7814 instruct loadConP(rRegP dst, immP con) %{
7815 match(Set dst con);
7816
7817 format %{ "movq $dst, $con\t# ptr" %}
7818 ins_encode %{
7819 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7820 %}
7821 ins_pipe(ialu_reg_fat); // XXX
7822 %}
7823
7824 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7825 %{
7826 match(Set dst src);
7827 effect(KILL cr);
7828
7829 ins_cost(50);
7830 format %{ "xorl $dst, $dst\t# ptr" %}
7831 ins_encode %{
7832 __ xorl($dst$$Register, $dst$$Register);
7833 %}
7834 ins_pipe(ialu_reg);
7835 %}
7836
7837 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7838 %{
7839 match(Set dst src);
7840 effect(KILL cr);
7841
7842 ins_cost(60);
7843 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7844 ins_encode %{
7845 __ movl($dst$$Register, $src$$constant);
7846 %}
7847 ins_pipe(ialu_reg);
7848 %}
7849
7850 instruct loadConF(regF dst, immF con) %{
7851 match(Set dst con);
7852 ins_cost(125);
7853 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7854 ins_encode %{
7855 __ movflt($dst$$XMMRegister, $constantaddress($con));
7856 %}
7857 ins_pipe(pipe_slow);
7858 %}
7859
7860 instruct loadConH(regF dst, immH con) %{
7861 match(Set dst con);
7862 ins_cost(125);
7863 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7864 ins_encode %{
7865 __ movflt($dst$$XMMRegister, $constantaddress($con));
7866 %}
7867 ins_pipe(pipe_slow);
7868 %}
7869
7870 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7871 match(Set dst src);
7872 effect(KILL cr);
7873 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7874 ins_encode %{
7875 __ xorq($dst$$Register, $dst$$Register);
7876 %}
7877 ins_pipe(ialu_reg);
7878 %}
7879
7880 instruct loadConN(rRegN dst, immN src) %{
7881 match(Set dst src);
7882
7883 ins_cost(125);
7884 format %{ "movl $dst, $src\t# compressed ptr" %}
7885 ins_encode %{
7886 address con = (address)$src$$constant;
7887 if (con == nullptr) {
7888 ShouldNotReachHere();
7889 } else {
7890 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7891 }
7892 %}
7893 ins_pipe(ialu_reg_fat); // XXX
7894 %}
7895
7896 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7897 match(Set dst src);
7898
7899 ins_cost(125);
7900 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7901 ins_encode %{
7902 address con = (address)$src$$constant;
7903 if (con == nullptr) {
7904 ShouldNotReachHere();
7905 } else {
7906 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7907 }
7908 %}
7909 ins_pipe(ialu_reg_fat); // XXX
7910 %}
7911
7912 instruct loadConF0(regF dst, immF0 src)
7913 %{
7914 match(Set dst src);
7915 ins_cost(100);
7916
7917 format %{ "xorps $dst, $dst\t# float 0.0" %}
7918 ins_encode %{
7919 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7920 %}
7921 ins_pipe(pipe_slow);
7922 %}
7923
7924 // Use the same format since predicate() can not be used here.
7925 instruct loadConD(regD dst, immD con) %{
7926 match(Set dst con);
7927 ins_cost(125);
7928 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7929 ins_encode %{
7930 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7931 %}
7932 ins_pipe(pipe_slow);
7933 %}
7934
7935 instruct loadConD0(regD dst, immD0 src)
7936 %{
7937 match(Set dst src);
7938 ins_cost(100);
7939
7940 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7941 ins_encode %{
7942 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7943 %}
7944 ins_pipe(pipe_slow);
7945 %}
7946
7947 instruct loadSSI(rRegI dst, stackSlotI src)
7948 %{
7949 match(Set dst src);
7950
7951 ins_cost(125);
7952 format %{ "movl $dst, $src\t# int stk" %}
7953 ins_encode %{
7954 __ movl($dst$$Register, $src$$Address);
7955 %}
7956 ins_pipe(ialu_reg_mem);
7957 %}
7958
7959 instruct loadSSL(rRegL dst, stackSlotL src)
7960 %{
7961 match(Set dst src);
7962
7963 ins_cost(125);
7964 format %{ "movq $dst, $src\t# long stk" %}
7965 ins_encode %{
7966 __ movq($dst$$Register, $src$$Address);
7967 %}
7968 ins_pipe(ialu_reg_mem);
7969 %}
7970
7971 instruct loadSSP(rRegP dst, stackSlotP src)
7972 %{
7973 match(Set dst src);
7974
7975 ins_cost(125);
7976 format %{ "movq $dst, $src\t# ptr stk" %}
7977 ins_encode %{
7978 __ movq($dst$$Register, $src$$Address);
7979 %}
7980 ins_pipe(ialu_reg_mem);
7981 %}
7982
7983 instruct loadSSF(regF dst, stackSlotF src)
7984 %{
7985 match(Set dst src);
7986
7987 ins_cost(125);
7988 format %{ "movss $dst, $src\t# float stk" %}
7989 ins_encode %{
7990 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7991 %}
7992 ins_pipe(pipe_slow); // XXX
7993 %}
7994
7995 // Use the same format since predicate() can not be used here.
7996 instruct loadSSD(regD dst, stackSlotD src)
7997 %{
7998 match(Set dst src);
7999
8000 ins_cost(125);
8001 format %{ "movsd $dst, $src\t# double stk" %}
8002 ins_encode %{
8003 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
8004 %}
8005 ins_pipe(pipe_slow); // XXX
8006 %}
8007
8008 // Prefetch instructions for allocation.
8009 // Must be safe to execute with invalid address (cannot fault).
8010
8011 instruct prefetchAlloc( memory mem ) %{
8012 predicate(AllocatePrefetchInstr==3);
8013 match(PrefetchAllocation mem);
8014 ins_cost(125);
8015
8016 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
8017 ins_encode %{
8018 __ prefetchw($mem$$Address);
8019 %}
8020 ins_pipe(ialu_mem);
8021 %}
8022
8023 instruct prefetchAllocNTA( memory mem ) %{
8024 predicate(AllocatePrefetchInstr==0);
8025 match(PrefetchAllocation mem);
8026 ins_cost(125);
8027
8028 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
8029 ins_encode %{
8030 __ prefetchnta($mem$$Address);
8031 %}
8032 ins_pipe(ialu_mem);
8033 %}
8034
8035 instruct prefetchAllocT0( memory mem ) %{
8036 predicate(AllocatePrefetchInstr==1);
8037 match(PrefetchAllocation mem);
8038 ins_cost(125);
8039
8040 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8041 ins_encode %{
8042 __ prefetcht0($mem$$Address);
8043 %}
8044 ins_pipe(ialu_mem);
8045 %}
8046
8047 instruct prefetchAllocT2( memory mem ) %{
8048 predicate(AllocatePrefetchInstr==2);
8049 match(PrefetchAllocation mem);
8050 ins_cost(125);
8051
8052 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8053 ins_encode %{
8054 __ prefetcht2($mem$$Address);
8055 %}
8056 ins_pipe(ialu_mem);
8057 %}
8058
8059 //----------Store Instructions-------------------------------------------------
8060
8061 // Store Byte
8062 instruct storeB(memory mem, rRegI src)
8063 %{
8064 match(Set mem (StoreB mem src));
8065
8066 ins_cost(125); // XXX
8067 format %{ "movb $mem, $src\t# byte" %}
8068 ins_encode %{
8069 __ movb($mem$$Address, $src$$Register);
8070 %}
8071 ins_pipe(ialu_mem_reg);
8072 %}
8073
8074 // Store Char/Short
8075 instruct storeC(memory mem, rRegI src)
8076 %{
8077 match(Set mem (StoreC mem src));
8078
8079 ins_cost(125); // XXX
8080 format %{ "movw $mem, $src\t# char/short" %}
8081 ins_encode %{
8082 __ movw($mem$$Address, $src$$Register);
8083 %}
8084 ins_pipe(ialu_mem_reg);
8085 %}
8086
8087 // Store Integer
8088 instruct storeI(memory mem, rRegI src)
8089 %{
8090 match(Set mem (StoreI mem src));
8091
8092 ins_cost(125); // XXX
8093 format %{ "movl $mem, $src\t# int" %}
8094 ins_encode %{
8095 __ movl($mem$$Address, $src$$Register);
8096 %}
8097 ins_pipe(ialu_mem_reg);
8098 %}
8099
8100 // Store Long
8101 instruct storeL(memory mem, rRegL src)
8102 %{
8103 match(Set mem (StoreL mem src));
8104
8105 ins_cost(125); // XXX
8106 format %{ "movq $mem, $src\t# long" %}
8107 ins_encode %{
8108 __ movq($mem$$Address, $src$$Register);
8109 %}
8110 ins_pipe(ialu_mem_reg); // XXX
8111 %}
8112
8113 // Store Pointer
8114 instruct storeP(memory mem, any_RegP src)
8115 %{
8116 predicate(n->as_Store()->barrier_data() == 0);
8117 match(Set mem (StoreP mem src));
8118
8119 ins_cost(125); // XXX
8120 format %{ "movq $mem, $src\t# ptr" %}
8121 ins_encode %{
8122 __ movq($mem$$Address, $src$$Register);
8123 %}
8124 ins_pipe(ialu_mem_reg);
8125 %}
8126
8127 instruct storeImmP0(memory mem, immP0 zero)
8128 %{
8129 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8130 match(Set mem (StoreP mem zero));
8131
8132 ins_cost(125); // XXX
8133 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8134 ins_encode %{
8135 __ movq($mem$$Address, r12);
8136 %}
8137 ins_pipe(ialu_mem_reg);
8138 %}
8139
8140 // Store Null Pointer, mark word, or other simple pointer constant.
8141 instruct storeImmP(memory mem, immP31 src)
8142 %{
8143 predicate(n->as_Store()->barrier_data() == 0);
8144 match(Set mem (StoreP mem src));
8145
8146 ins_cost(150); // XXX
8147 format %{ "movq $mem, $src\t# ptr" %}
8148 ins_encode %{
8149 __ movq($mem$$Address, $src$$constant);
8150 %}
8151 ins_pipe(ialu_mem_imm);
8152 %}
8153
8154 // Store Compressed Pointer
8155 instruct storeN(memory mem, rRegN src)
8156 %{
8157 predicate(n->as_Store()->barrier_data() == 0);
8158 match(Set mem (StoreN mem src));
8159
8160 ins_cost(125); // XXX
8161 format %{ "movl $mem, $src\t# compressed ptr" %}
8162 ins_encode %{
8163 __ movl($mem$$Address, $src$$Register);
8164 %}
8165 ins_pipe(ialu_mem_reg);
8166 %}
8167
8168 instruct storeNKlass(memory mem, rRegN src)
8169 %{
8170 match(Set mem (StoreNKlass mem src));
8171
8172 ins_cost(125); // XXX
8173 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8174 ins_encode %{
8175 __ movl($mem$$Address, $src$$Register);
8176 %}
8177 ins_pipe(ialu_mem_reg);
8178 %}
8179
8180 instruct storeImmN0(memory mem, immN0 zero)
8181 %{
8182 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8183 match(Set mem (StoreN mem zero));
8184
8185 ins_cost(125); // XXX
8186 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8187 ins_encode %{
8188 __ movl($mem$$Address, r12);
8189 %}
8190 ins_pipe(ialu_mem_reg);
8191 %}
8192
8193 instruct storeImmN(memory mem, immN src)
8194 %{
8195 predicate(n->as_Store()->barrier_data() == 0);
8196 match(Set mem (StoreN mem src));
8197
8198 ins_cost(150); // XXX
8199 format %{ "movl $mem, $src\t# compressed ptr" %}
8200 ins_encode %{
8201 address con = (address)$src$$constant;
8202 if (con == nullptr) {
8203 __ movl($mem$$Address, 0);
8204 } else {
8205 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8206 }
8207 %}
8208 ins_pipe(ialu_mem_imm);
8209 %}
8210
8211 instruct storeImmNKlass(memory mem, immNKlass src)
8212 %{
8213 match(Set mem (StoreNKlass mem src));
8214
8215 ins_cost(150); // XXX
8216 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8217 ins_encode %{
8218 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8219 %}
8220 ins_pipe(ialu_mem_imm);
8221 %}
8222
8223 // Store Integer Immediate
8224 instruct storeImmI0(memory mem, immI_0 zero)
8225 %{
8226 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8227 match(Set mem (StoreI mem zero));
8228
8229 ins_cost(125); // XXX
8230 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8231 ins_encode %{
8232 __ movl($mem$$Address, r12);
8233 %}
8234 ins_pipe(ialu_mem_reg);
8235 %}
8236
8237 instruct storeImmI(memory mem, immI src)
8238 %{
8239 match(Set mem (StoreI mem src));
8240
8241 ins_cost(150);
8242 format %{ "movl $mem, $src\t# int" %}
8243 ins_encode %{
8244 __ movl($mem$$Address, $src$$constant);
8245 %}
8246 ins_pipe(ialu_mem_imm);
8247 %}
8248
8249 // Store Long Immediate
8250 instruct storeImmL0(memory mem, immL0 zero)
8251 %{
8252 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8253 match(Set mem (StoreL mem zero));
8254
8255 ins_cost(125); // XXX
8256 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8257 ins_encode %{
8258 __ movq($mem$$Address, r12);
8259 %}
8260 ins_pipe(ialu_mem_reg);
8261 %}
8262
8263 instruct storeImmL(memory mem, immL32 src)
8264 %{
8265 match(Set mem (StoreL mem src));
8266
8267 ins_cost(150);
8268 format %{ "movq $mem, $src\t# long" %}
8269 ins_encode %{
8270 __ movq($mem$$Address, $src$$constant);
8271 %}
8272 ins_pipe(ialu_mem_imm);
8273 %}
8274
8275 // Store Short/Char Immediate
8276 instruct storeImmC0(memory mem, immI_0 zero)
8277 %{
8278 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8279 match(Set mem (StoreC mem zero));
8280
8281 ins_cost(125); // XXX
8282 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8283 ins_encode %{
8284 __ movw($mem$$Address, r12);
8285 %}
8286 ins_pipe(ialu_mem_reg);
8287 %}
8288
8289 instruct storeImmI16(memory mem, immI16 src)
8290 %{
8291 predicate(UseStoreImmI16);
8292 match(Set mem (StoreC mem src));
8293
8294 ins_cost(150);
8295 format %{ "movw $mem, $src\t# short/char" %}
8296 ins_encode %{
8297 __ movw($mem$$Address, $src$$constant);
8298 %}
8299 ins_pipe(ialu_mem_imm);
8300 %}
8301
8302 // Store Byte Immediate
8303 instruct storeImmB0(memory mem, immI_0 zero)
8304 %{
8305 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8306 match(Set mem (StoreB mem zero));
8307
8308 ins_cost(125); // XXX
8309 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8310 ins_encode %{
8311 __ movb($mem$$Address, r12);
8312 %}
8313 ins_pipe(ialu_mem_reg);
8314 %}
8315
8316 instruct storeImmB(memory mem, immI8 src)
8317 %{
8318 match(Set mem (StoreB mem src));
8319
8320 ins_cost(150); // XXX
8321 format %{ "movb $mem, $src\t# byte" %}
8322 ins_encode %{
8323 __ movb($mem$$Address, $src$$constant);
8324 %}
8325 ins_pipe(ialu_mem_imm);
8326 %}
8327
8328 // Store Float
8329 instruct storeF(memory mem, regF src)
8330 %{
8331 match(Set mem (StoreF mem src));
8332
8333 ins_cost(95); // XXX
8334 format %{ "movss $mem, $src\t# float" %}
8335 ins_encode %{
8336 __ movflt($mem$$Address, $src$$XMMRegister);
8337 %}
8338 ins_pipe(pipe_slow); // XXX
8339 %}
8340
8341 // Store immediate Float value (it is faster than store from XMM register)
8342 instruct storeF0(memory mem, immF0 zero)
8343 %{
8344 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8345 match(Set mem (StoreF mem zero));
8346
8347 ins_cost(25); // XXX
8348 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8349 ins_encode %{
8350 __ movl($mem$$Address, r12);
8351 %}
8352 ins_pipe(ialu_mem_reg);
8353 %}
8354
8355 instruct storeF_imm(memory mem, immF src)
8356 %{
8357 match(Set mem (StoreF mem src));
8358
8359 ins_cost(50);
8360 format %{ "movl $mem, $src\t# float" %}
8361 ins_encode %{
8362 __ movl($mem$$Address, jint_cast($src$$constant));
8363 %}
8364 ins_pipe(ialu_mem_imm);
8365 %}
8366
8367 // Store Double
8368 instruct storeD(memory mem, regD src)
8369 %{
8370 match(Set mem (StoreD mem src));
8371
8372 ins_cost(95); // XXX
8373 format %{ "movsd $mem, $src\t# double" %}
8374 ins_encode %{
8375 __ movdbl($mem$$Address, $src$$XMMRegister);
8376 %}
8377 ins_pipe(pipe_slow); // XXX
8378 %}
8379
8380 // Store immediate double 0.0 (it is faster than store from XMM register)
8381 instruct storeD0_imm(memory mem, immD0 src)
8382 %{
8383 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8384 match(Set mem (StoreD mem src));
8385
8386 ins_cost(50);
8387 format %{ "movq $mem, $src\t# double 0." %}
8388 ins_encode %{
8389 __ movq($mem$$Address, $src$$constant);
8390 %}
8391 ins_pipe(ialu_mem_imm);
8392 %}
8393
8394 instruct storeD0(memory mem, immD0 zero)
8395 %{
8396 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8397 match(Set mem (StoreD mem zero));
8398
8399 ins_cost(25); // XXX
8400 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8401 ins_encode %{
8402 __ movq($mem$$Address, r12);
8403 %}
8404 ins_pipe(ialu_mem_reg);
8405 %}
8406
8407 instruct storeSSI(stackSlotI dst, rRegI src)
8408 %{
8409 match(Set dst src);
8410
8411 ins_cost(100);
8412 format %{ "movl $dst, $src\t# int stk" %}
8413 ins_encode %{
8414 __ movl($dst$$Address, $src$$Register);
8415 %}
8416 ins_pipe( ialu_mem_reg );
8417 %}
8418
8419 instruct storeSSL(stackSlotL dst, rRegL src)
8420 %{
8421 match(Set dst src);
8422
8423 ins_cost(100);
8424 format %{ "movq $dst, $src\t# long stk" %}
8425 ins_encode %{
8426 __ movq($dst$$Address, $src$$Register);
8427 %}
8428 ins_pipe(ialu_mem_reg);
8429 %}
8430
8431 instruct storeSSP(stackSlotP dst, rRegP src)
8432 %{
8433 match(Set dst src);
8434
8435 ins_cost(100);
8436 format %{ "movq $dst, $src\t# ptr stk" %}
8437 ins_encode %{
8438 __ movq($dst$$Address, $src$$Register);
8439 %}
8440 ins_pipe(ialu_mem_reg);
8441 %}
8442
8443 instruct storeSSF(stackSlotF dst, regF src)
8444 %{
8445 match(Set dst src);
8446
8447 ins_cost(95); // XXX
8448 format %{ "movss $dst, $src\t# float stk" %}
8449 ins_encode %{
8450 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8451 %}
8452 ins_pipe(pipe_slow); // XXX
8453 %}
8454
8455 instruct storeSSD(stackSlotD dst, regD src)
8456 %{
8457 match(Set dst src);
8458
8459 ins_cost(95); // XXX
8460 format %{ "movsd $dst, $src\t# double stk" %}
8461 ins_encode %{
8462 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8463 %}
8464 ins_pipe(pipe_slow); // XXX
8465 %}
8466
8467 instruct cacheWB(indirect addr)
8468 %{
8469 predicate(VM_Version::supports_data_cache_line_flush());
8470 match(CacheWB addr);
8471
8472 ins_cost(100);
8473 format %{"cache wb $addr" %}
8474 ins_encode %{
8475 assert($addr->index_position() < 0, "should be");
8476 assert($addr$$disp == 0, "should be");
8477 __ cache_wb(Address($addr$$base$$Register, 0));
8478 %}
8479 ins_pipe(pipe_slow); // XXX
8480 %}
8481
8482 instruct cacheWBPreSync()
8483 %{
8484 predicate(VM_Version::supports_data_cache_line_flush());
8485 match(CacheWBPreSync);
8486
8487 ins_cost(100);
8488 format %{"cache wb presync" %}
8489 ins_encode %{
8490 __ cache_wbsync(true);
8491 %}
8492 ins_pipe(pipe_slow); // XXX
8493 %}
8494
8495 instruct cacheWBPostSync()
8496 %{
8497 predicate(VM_Version::supports_data_cache_line_flush());
8498 match(CacheWBPostSync);
8499
8500 ins_cost(100);
8501 format %{"cache wb postsync" %}
8502 ins_encode %{
8503 __ cache_wbsync(false);
8504 %}
8505 ins_pipe(pipe_slow); // XXX
8506 %}
8507
8508 //----------BSWAP Instructions-------------------------------------------------
8509 instruct bytes_reverse_int(rRegI dst) %{
8510 match(Set dst (ReverseBytesI dst));
8511
8512 format %{ "bswapl $dst" %}
8513 ins_encode %{
8514 __ bswapl($dst$$Register);
8515 %}
8516 ins_pipe( ialu_reg );
8517 %}
8518
8519 instruct bytes_reverse_long(rRegL dst) %{
8520 match(Set dst (ReverseBytesL dst));
8521
8522 format %{ "bswapq $dst" %}
8523 ins_encode %{
8524 __ bswapq($dst$$Register);
8525 %}
8526 ins_pipe( ialu_reg);
8527 %}
8528
8529 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8530 match(Set dst (ReverseBytesUS dst));
8531 effect(KILL cr);
8532
8533 format %{ "bswapl $dst\n\t"
8534 "shrl $dst,16\n\t" %}
8535 ins_encode %{
8536 __ bswapl($dst$$Register);
8537 __ shrl($dst$$Register, 16);
8538 %}
8539 ins_pipe( ialu_reg );
8540 %}
8541
8542 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8543 match(Set dst (ReverseBytesS dst));
8544 effect(KILL cr);
8545
8546 format %{ "bswapl $dst\n\t"
8547 "sar $dst,16\n\t" %}
8548 ins_encode %{
8549 __ bswapl($dst$$Register);
8550 __ sarl($dst$$Register, 16);
8551 %}
8552 ins_pipe( ialu_reg );
8553 %}
8554
8555 //---------- Zeros Count Instructions ------------------------------------------
8556
8557 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8558 predicate(UseCountLeadingZerosInstruction);
8559 match(Set dst (CountLeadingZerosI src));
8560 effect(KILL cr);
8561
8562 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8563 ins_encode %{
8564 __ lzcntl($dst$$Register, $src$$Register);
8565 %}
8566 ins_pipe(ialu_reg);
8567 %}
8568
8569 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8570 predicate(UseCountLeadingZerosInstruction);
8571 match(Set dst (CountLeadingZerosI (LoadI src)));
8572 effect(KILL cr);
8573 ins_cost(175);
8574 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8575 ins_encode %{
8576 __ lzcntl($dst$$Register, $src$$Address);
8577 %}
8578 ins_pipe(ialu_reg_mem);
8579 %}
8580
8581 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8582 predicate(!UseCountLeadingZerosInstruction);
8583 match(Set dst (CountLeadingZerosI src));
8584 effect(KILL cr);
8585
8586 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8587 "jnz skip\n\t"
8588 "movl $dst, -1\n"
8589 "skip:\n\t"
8590 "negl $dst\n\t"
8591 "addl $dst, 31" %}
8592 ins_encode %{
8593 Register Rdst = $dst$$Register;
8594 Register Rsrc = $src$$Register;
8595 Label skip;
8596 __ bsrl(Rdst, Rsrc);
8597 __ jccb(Assembler::notZero, skip);
8598 __ movl(Rdst, -1);
8599 __ bind(skip);
8600 __ negl(Rdst);
8601 __ addl(Rdst, BitsPerInt - 1);
8602 %}
8603 ins_pipe(ialu_reg);
8604 %}
8605
8606 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8607 predicate(UseCountLeadingZerosInstruction);
8608 match(Set dst (CountLeadingZerosL src));
8609 effect(KILL cr);
8610
8611 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8612 ins_encode %{
8613 __ lzcntq($dst$$Register, $src$$Register);
8614 %}
8615 ins_pipe(ialu_reg);
8616 %}
8617
8618 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8619 predicate(UseCountLeadingZerosInstruction);
8620 match(Set dst (CountLeadingZerosL (LoadL src)));
8621 effect(KILL cr);
8622 ins_cost(175);
8623 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8624 ins_encode %{
8625 __ lzcntq($dst$$Register, $src$$Address);
8626 %}
8627 ins_pipe(ialu_reg_mem);
8628 %}
8629
8630 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8631 predicate(!UseCountLeadingZerosInstruction);
8632 match(Set dst (CountLeadingZerosL src));
8633 effect(KILL cr);
8634
8635 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8636 "jnz skip\n\t"
8637 "movl $dst, -1\n"
8638 "skip:\n\t"
8639 "negl $dst\n\t"
8640 "addl $dst, 63" %}
8641 ins_encode %{
8642 Register Rdst = $dst$$Register;
8643 Register Rsrc = $src$$Register;
8644 Label skip;
8645 __ bsrq(Rdst, Rsrc);
8646 __ jccb(Assembler::notZero, skip);
8647 __ movl(Rdst, -1);
8648 __ bind(skip);
8649 __ negl(Rdst);
8650 __ addl(Rdst, BitsPerLong - 1);
8651 %}
8652 ins_pipe(ialu_reg);
8653 %}
8654
8655 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8656 predicate(UseCountTrailingZerosInstruction);
8657 match(Set dst (CountTrailingZerosI src));
8658 effect(KILL cr);
8659
8660 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8661 ins_encode %{
8662 __ tzcntl($dst$$Register, $src$$Register);
8663 %}
8664 ins_pipe(ialu_reg);
8665 %}
8666
8667 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8668 predicate(UseCountTrailingZerosInstruction);
8669 match(Set dst (CountTrailingZerosI (LoadI src)));
8670 effect(KILL cr);
8671 ins_cost(175);
8672 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8673 ins_encode %{
8674 __ tzcntl($dst$$Register, $src$$Address);
8675 %}
8676 ins_pipe(ialu_reg_mem);
8677 %}
8678
8679 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8680 predicate(!UseCountTrailingZerosInstruction);
8681 match(Set dst (CountTrailingZerosI src));
8682 effect(KILL cr);
8683
8684 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8685 "jnz done\n\t"
8686 "movl $dst, 32\n"
8687 "done:" %}
8688 ins_encode %{
8689 Register Rdst = $dst$$Register;
8690 Label done;
8691 __ bsfl(Rdst, $src$$Register);
8692 __ jccb(Assembler::notZero, done);
8693 __ movl(Rdst, BitsPerInt);
8694 __ bind(done);
8695 %}
8696 ins_pipe(ialu_reg);
8697 %}
8698
8699 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8700 predicate(UseCountTrailingZerosInstruction);
8701 match(Set dst (CountTrailingZerosL src));
8702 effect(KILL cr);
8703
8704 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8705 ins_encode %{
8706 __ tzcntq($dst$$Register, $src$$Register);
8707 %}
8708 ins_pipe(ialu_reg);
8709 %}
8710
8711 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8712 predicate(UseCountTrailingZerosInstruction);
8713 match(Set dst (CountTrailingZerosL (LoadL src)));
8714 effect(KILL cr);
8715 ins_cost(175);
8716 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8717 ins_encode %{
8718 __ tzcntq($dst$$Register, $src$$Address);
8719 %}
8720 ins_pipe(ialu_reg_mem);
8721 %}
8722
8723 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8724 predicate(!UseCountTrailingZerosInstruction);
8725 match(Set dst (CountTrailingZerosL src));
8726 effect(KILL cr);
8727
8728 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8729 "jnz done\n\t"
8730 "movl $dst, 64\n"
8731 "done:" %}
8732 ins_encode %{
8733 Register Rdst = $dst$$Register;
8734 Label done;
8735 __ bsfq(Rdst, $src$$Register);
8736 __ jccb(Assembler::notZero, done);
8737 __ movl(Rdst, BitsPerLong);
8738 __ bind(done);
8739 %}
8740 ins_pipe(ialu_reg);
8741 %}
8742
8743 //--------------- Reverse Operation Instructions ----------------
8744 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8745 predicate(!VM_Version::supports_gfni());
8746 match(Set dst (ReverseI src));
8747 effect(TEMP dst, TEMP rtmp, KILL cr);
8748 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8749 ins_encode %{
8750 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8751 %}
8752 ins_pipe( ialu_reg );
8753 %}
8754
8755 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8756 predicate(VM_Version::supports_gfni());
8757 match(Set dst (ReverseI src));
8758 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8759 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8760 ins_encode %{
8761 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8762 %}
8763 ins_pipe( ialu_reg );
8764 %}
8765
8766 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8767 predicate(!VM_Version::supports_gfni());
8768 match(Set dst (ReverseL src));
8769 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8770 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8771 ins_encode %{
8772 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8773 %}
8774 ins_pipe( ialu_reg );
8775 %}
8776
8777 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8778 predicate(VM_Version::supports_gfni());
8779 match(Set dst (ReverseL src));
8780 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8781 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8782 ins_encode %{
8783 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8784 %}
8785 ins_pipe( ialu_reg );
8786 %}
8787
8788 //---------- Population Count Instructions -------------------------------------
8789
8790 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8791 predicate(UsePopCountInstruction);
8792 match(Set dst (PopCountI src));
8793 effect(KILL cr);
8794
8795 format %{ "popcnt $dst, $src" %}
8796 ins_encode %{
8797 __ popcntl($dst$$Register, $src$$Register);
8798 %}
8799 ins_pipe(ialu_reg);
8800 %}
8801
8802 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8803 predicate(UsePopCountInstruction);
8804 match(Set dst (PopCountI (LoadI mem)));
8805 effect(KILL cr);
8806
8807 format %{ "popcnt $dst, $mem" %}
8808 ins_encode %{
8809 __ popcntl($dst$$Register, $mem$$Address);
8810 %}
8811 ins_pipe(ialu_reg);
8812 %}
8813
8814 // Note: Long.bitCount(long) returns an int.
8815 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8816 predicate(UsePopCountInstruction);
8817 match(Set dst (PopCountL src));
8818 effect(KILL cr);
8819
8820 format %{ "popcnt $dst, $src" %}
8821 ins_encode %{
8822 __ popcntq($dst$$Register, $src$$Register);
8823 %}
8824 ins_pipe(ialu_reg);
8825 %}
8826
8827 // Note: Long.bitCount(long) returns an int.
8828 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8829 predicate(UsePopCountInstruction);
8830 match(Set dst (PopCountL (LoadL mem)));
8831 effect(KILL cr);
8832
8833 format %{ "popcnt $dst, $mem" %}
8834 ins_encode %{
8835 __ popcntq($dst$$Register, $mem$$Address);
8836 %}
8837 ins_pipe(ialu_reg);
8838 %}
8839
8840
8841 //----------MemBar Instructions-----------------------------------------------
8842 // Memory barrier flavors
8843
8844 instruct membar_acquire()
8845 %{
8846 match(MemBarAcquire);
8847 match(LoadFence);
8848 ins_cost(0);
8849
8850 size(0);
8851 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8852 ins_encode();
8853 ins_pipe(empty);
8854 %}
8855
8856 instruct membar_acquire_lock()
8857 %{
8858 match(MemBarAcquireLock);
8859 ins_cost(0);
8860
8861 size(0);
8862 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8863 ins_encode();
8864 ins_pipe(empty);
8865 %}
8866
8867 instruct membar_release()
8868 %{
8869 match(MemBarRelease);
8870 match(StoreFence);
8871 ins_cost(0);
8872
8873 size(0);
8874 format %{ "MEMBAR-release ! (empty encoding)" %}
8875 ins_encode();
8876 ins_pipe(empty);
8877 %}
8878
8879 instruct membar_release_lock()
8880 %{
8881 match(MemBarReleaseLock);
8882 ins_cost(0);
8883
8884 size(0);
8885 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8886 ins_encode();
8887 ins_pipe(empty);
8888 %}
8889
8890 instruct membar_volatile(rFlagsReg cr) %{
8891 match(MemBarVolatile);
8892 effect(KILL cr);
8893 ins_cost(400);
8894
8895 format %{
8896 $$template
8897 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8898 %}
8899 ins_encode %{
8900 __ membar(Assembler::StoreLoad);
8901 %}
8902 ins_pipe(pipe_slow);
8903 %}
8904
8905 instruct unnecessary_membar_volatile()
8906 %{
8907 match(MemBarVolatile);
8908 predicate(Matcher::post_store_load_barrier(n));
8909 ins_cost(0);
8910
8911 size(0);
8912 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8913 ins_encode();
8914 ins_pipe(empty);
8915 %}
8916
8917 instruct membar_storestore() %{
8918 match(MemBarStoreStore);
8919 match(StoreStoreFence);
8920 ins_cost(0);
8921
8922 size(0);
8923 format %{ "MEMBAR-storestore (empty encoding)" %}
8924 ins_encode( );
8925 ins_pipe(empty);
8926 %}
8927
8928 //----------Move Instructions--------------------------------------------------
8929
8930 instruct castX2P(rRegP dst, rRegL src)
8931 %{
8932 match(Set dst (CastX2P src));
8933
8934 format %{ "movq $dst, $src\t# long->ptr" %}
8935 ins_encode %{
8936 if ($dst$$reg != $src$$reg) {
8937 __ movptr($dst$$Register, $src$$Register);
8938 }
8939 %}
8940 ins_pipe(ialu_reg_reg); // XXX
8941 %}
8942
8943 instruct castI2N(rRegN dst, rRegI src)
8944 %{
8945 match(Set dst (CastI2N src));
8946
8947 format %{ "movq $dst, $src\t# int -> narrow ptr" %}
8948 ins_encode %{
8949 if ($dst$$reg != $src$$reg) {
8950 __ movl($dst$$Register, $src$$Register);
8951 }
8952 %}
8953 ins_pipe(ialu_reg_reg); // XXX
8954 %}
8955
8956 instruct castN2X(rRegL dst, rRegN src)
8957 %{
8958 match(Set dst (CastP2X src));
8959
8960 format %{ "movq $dst, $src\t# ptr -> long" %}
8961 ins_encode %{
8962 if ($dst$$reg != $src$$reg) {
8963 __ movptr($dst$$Register, $src$$Register);
8964 }
8965 %}
8966 ins_pipe(ialu_reg_reg); // XXX
8967 %}
8968
8969 instruct castP2X(rRegL dst, rRegP src)
8970 %{
8971 match(Set dst (CastP2X src));
8972
8973 format %{ "movq $dst, $src\t# ptr -> long" %}
8974 ins_encode %{
8975 if ($dst$$reg != $src$$reg) {
8976 __ movptr($dst$$Register, $src$$Register);
8977 }
8978 %}
8979 ins_pipe(ialu_reg_reg); // XXX
8980 %}
8981
8982 // Convert oop into int for vectors alignment masking
8983 instruct convP2I(rRegI dst, rRegP src)
8984 %{
8985 match(Set dst (ConvL2I (CastP2X src)));
8986
8987 format %{ "movl $dst, $src\t# ptr -> int" %}
8988 ins_encode %{
8989 __ movl($dst$$Register, $src$$Register);
8990 %}
8991 ins_pipe(ialu_reg_reg); // XXX
8992 %}
8993
8994 // Convert compressed oop into int for vectors alignment masking
8995 // in case of 32bit oops (heap < 4Gb).
8996 instruct convN2I(rRegI dst, rRegN src)
8997 %{
8998 predicate(CompressedOops::shift() == 0);
8999 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9000
9001 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
9002 ins_encode %{
9003 __ movl($dst$$Register, $src$$Register);
9004 %}
9005 ins_pipe(ialu_reg_reg); // XXX
9006 %}
9007
9008 // Convert oop pointer into compressed form
9009 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
9010 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9011 match(Set dst (EncodeP src));
9012 effect(KILL cr);
9013 format %{ "encode_heap_oop $dst,$src" %}
9014 ins_encode %{
9015 Register s = $src$$Register;
9016 Register d = $dst$$Register;
9017 if (s != d) {
9018 __ movq(d, s);
9019 }
9020 __ encode_heap_oop(d);
9021 %}
9022 ins_pipe(ialu_reg_long);
9023 %}
9024
9025 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9026 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9027 match(Set dst (EncodeP src));
9028 effect(KILL cr);
9029 format %{ "encode_heap_oop_not_null $dst,$src" %}
9030 ins_encode %{
9031 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9032 %}
9033 ins_pipe(ialu_reg_long);
9034 %}
9035
9036 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9037 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9038 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9039 match(Set dst (DecodeN src));
9040 effect(KILL cr);
9041 format %{ "decode_heap_oop $dst,$src" %}
9042 ins_encode %{
9043 Register s = $src$$Register;
9044 Register d = $dst$$Register;
9045 if (s != d) {
9046 __ movq(d, s);
9047 }
9048 __ decode_heap_oop(d);
9049 %}
9050 ins_pipe(ialu_reg_long);
9051 %}
9052
9053 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9054 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9055 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9056 match(Set dst (DecodeN src));
9057 effect(KILL cr);
9058 format %{ "decode_heap_oop_not_null $dst,$src" %}
9059 ins_encode %{
9060 Register s = $src$$Register;
9061 Register d = $dst$$Register;
9062 if (s != d) {
9063 __ decode_heap_oop_not_null(d, s);
9064 } else {
9065 __ decode_heap_oop_not_null(d);
9066 }
9067 %}
9068 ins_pipe(ialu_reg_long);
9069 %}
9070
9071 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9072 match(Set dst (EncodePKlass src));
9073 effect(TEMP dst, KILL cr);
9074 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9075 ins_encode %{
9076 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9077 %}
9078 ins_pipe(ialu_reg_long);
9079 %}
9080
9081 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9082 match(Set dst (DecodeNKlass src));
9083 effect(TEMP dst, KILL cr);
9084 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9085 ins_encode %{
9086 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9087 %}
9088 ins_pipe(ialu_reg_long);
9089 %}
9090
9091 //----------Conditional Move---------------------------------------------------
9092 // Jump
9093 // dummy instruction for generating temp registers
9094 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9095 match(Jump (LShiftL switch_val shift));
9096 ins_cost(350);
9097 predicate(false);
9098 effect(TEMP dest);
9099
9100 format %{ "leaq $dest, [$constantaddress]\n\t"
9101 "jmp [$dest + $switch_val << $shift]\n\t" %}
9102 ins_encode %{
9103 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9104 // to do that and the compiler is using that register as one it can allocate.
9105 // So we build it all by hand.
9106 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9107 // ArrayAddress dispatch(table, index);
9108 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9109 __ lea($dest$$Register, $constantaddress);
9110 __ jmp(dispatch);
9111 %}
9112 ins_pipe(pipe_jmp);
9113 %}
9114
9115 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9116 match(Jump (AddL (LShiftL switch_val shift) offset));
9117 ins_cost(350);
9118 effect(TEMP dest);
9119
9120 format %{ "leaq $dest, [$constantaddress]\n\t"
9121 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9122 ins_encode %{
9123 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9124 // to do that and the compiler is using that register as one it can allocate.
9125 // So we build it all by hand.
9126 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9127 // ArrayAddress dispatch(table, index);
9128 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9129 __ lea($dest$$Register, $constantaddress);
9130 __ jmp(dispatch);
9131 %}
9132 ins_pipe(pipe_jmp);
9133 %}
9134
9135 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9136 match(Jump switch_val);
9137 ins_cost(350);
9138 effect(TEMP dest);
9139
9140 format %{ "leaq $dest, [$constantaddress]\n\t"
9141 "jmp [$dest + $switch_val]\n\t" %}
9142 ins_encode %{
9143 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9144 // to do that and the compiler is using that register as one it can allocate.
9145 // So we build it all by hand.
9146 // Address index(noreg, switch_reg, Address::times_1);
9147 // ArrayAddress dispatch(table, index);
9148 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9149 __ lea($dest$$Register, $constantaddress);
9150 __ jmp(dispatch);
9151 %}
9152 ins_pipe(pipe_jmp);
9153 %}
9154
9155 // Conditional move
9156 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9157 %{
9158 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9159 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9160
9161 ins_cost(100); // XXX
9162 format %{ "setbn$cop $dst\t# signed, int" %}
9163 ins_encode %{
9164 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9165 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9166 %}
9167 ins_pipe(ialu_reg);
9168 %}
9169
9170 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9171 %{
9172 predicate(!UseAPX);
9173 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9174
9175 ins_cost(200); // XXX
9176 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9177 ins_encode %{
9178 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9179 %}
9180 ins_pipe(pipe_cmov_reg);
9181 %}
9182
9183 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9184 %{
9185 predicate(UseAPX);
9186 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9187
9188 ins_cost(200);
9189 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9190 ins_encode %{
9191 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9192 %}
9193 ins_pipe(pipe_cmov_reg);
9194 %}
9195
9196 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9197 %{
9198 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9199 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9200
9201 ins_cost(100); // XXX
9202 format %{ "setbn$cop $dst\t# unsigned, int" %}
9203 ins_encode %{
9204 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9205 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9206 %}
9207 ins_pipe(ialu_reg);
9208 %}
9209
9210 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9211 predicate(!UseAPX);
9212 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9213
9214 ins_cost(200); // XXX
9215 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9216 ins_encode %{
9217 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9218 %}
9219 ins_pipe(pipe_cmov_reg);
9220 %}
9221
9222 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9223 predicate(UseAPX);
9224 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9225
9226 ins_cost(200);
9227 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9228 ins_encode %{
9229 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9230 %}
9231 ins_pipe(pipe_cmov_reg);
9232 %}
9233
9234 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9235 %{
9236 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9237 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9238
9239 ins_cost(100); // XXX
9240 format %{ "setbn$cop $dst\t# unsigned, int" %}
9241 ins_encode %{
9242 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9243 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9244 %}
9245 ins_pipe(ialu_reg);
9246 %}
9247
9248 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9249 %{
9250 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9251 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9252
9253 ins_cost(100); // XXX
9254 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9255 ins_encode %{
9256 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9257 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9258 %}
9259 ins_pipe(ialu_reg);
9260 %}
9261
9262 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9263 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9264
9265 ins_cost(200);
9266 expand %{
9267 cmovI_regU(cop, cr, dst, src);
9268 %}
9269 %}
9270
9271 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9272 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9273
9274 ins_cost(200);
9275 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9276 ins_encode %{
9277 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9278 %}
9279 ins_pipe(pipe_cmov_reg);
9280 %}
9281
9282 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9283 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9284 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9285
9286 ins_cost(200); // XXX
9287 format %{ "cmovpl $dst, $src\n\t"
9288 "cmovnel $dst, $src" %}
9289 ins_encode %{
9290 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9291 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9292 %}
9293 ins_pipe(pipe_cmov_reg);
9294 %}
9295
9296 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9297 // inputs of the CMove
9298 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9299 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9300 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9301 effect(TEMP dst);
9302
9303 ins_cost(200); // XXX
9304 format %{ "cmovpl $dst, $src\n\t"
9305 "cmovnel $dst, $src" %}
9306 ins_encode %{
9307 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9308 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9309 %}
9310 ins_pipe(pipe_cmov_reg);
9311 %}
9312
9313 // Conditional move
9314 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9315 predicate(!UseAPX);
9316 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9317
9318 ins_cost(250); // XXX
9319 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9320 ins_encode %{
9321 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9322 %}
9323 ins_pipe(pipe_cmov_mem);
9324 %}
9325
9326 // Conditional move
9327 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9328 %{
9329 predicate(UseAPX);
9330 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9331
9332 ins_cost(250);
9333 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9334 ins_encode %{
9335 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9336 %}
9337 ins_pipe(pipe_cmov_mem);
9338 %}
9339
9340 // Conditional move
9341 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9342 %{
9343 predicate(!UseAPX);
9344 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9345
9346 ins_cost(250); // XXX
9347 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9348 ins_encode %{
9349 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9350 %}
9351 ins_pipe(pipe_cmov_mem);
9352 %}
9353
9354 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9355 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9356
9357 ins_cost(250);
9358 expand %{
9359 cmovI_memU(cop, cr, dst, src);
9360 %}
9361 %}
9362
9363 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9364 %{
9365 predicate(UseAPX);
9366 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9367
9368 ins_cost(250);
9369 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9370 ins_encode %{
9371 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9372 %}
9373 ins_pipe(pipe_cmov_mem);
9374 %}
9375
9376 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9377 %{
9378 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9379
9380 ins_cost(250);
9381 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9382 ins_encode %{
9383 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9384 %}
9385 ins_pipe(pipe_cmov_mem);
9386 %}
9387
9388 // Conditional move
9389 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9390 %{
9391 predicate(!UseAPX);
9392 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9393
9394 ins_cost(200); // XXX
9395 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9396 ins_encode %{
9397 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9398 %}
9399 ins_pipe(pipe_cmov_reg);
9400 %}
9401
9402 // Conditional move ndd
9403 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9404 %{
9405 predicate(UseAPX);
9406 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9407
9408 ins_cost(200);
9409 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9410 ins_encode %{
9411 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9412 %}
9413 ins_pipe(pipe_cmov_reg);
9414 %}
9415
9416 // Conditional move
9417 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9418 %{
9419 predicate(!UseAPX);
9420 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9421
9422 ins_cost(200); // XXX
9423 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9424 ins_encode %{
9425 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9426 %}
9427 ins_pipe(pipe_cmov_reg);
9428 %}
9429
9430 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9431 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9432
9433 ins_cost(200);
9434 expand %{
9435 cmovN_regU(cop, cr, dst, src);
9436 %}
9437 %}
9438
9439 // Conditional move ndd
9440 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9441 %{
9442 predicate(UseAPX);
9443 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9444
9445 ins_cost(200);
9446 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9447 ins_encode %{
9448 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9449 %}
9450 ins_pipe(pipe_cmov_reg);
9451 %}
9452
9453 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9454 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9455
9456 ins_cost(200);
9457 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9458 ins_encode %{
9459 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9460 %}
9461 ins_pipe(pipe_cmov_reg);
9462 %}
9463
9464 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9465 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9466 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9467
9468 ins_cost(200); // XXX
9469 format %{ "cmovpl $dst, $src\n\t"
9470 "cmovnel $dst, $src" %}
9471 ins_encode %{
9472 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9473 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9474 %}
9475 ins_pipe(pipe_cmov_reg);
9476 %}
9477
9478 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9479 // inputs of the CMove
9480 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9481 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9482 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9483
9484 ins_cost(200); // XXX
9485 format %{ "cmovpl $dst, $src\n\t"
9486 "cmovnel $dst, $src" %}
9487 ins_encode %{
9488 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9489 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9490 %}
9491 ins_pipe(pipe_cmov_reg);
9492 %}
9493
9494 // Conditional move
9495 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9496 %{
9497 predicate(!UseAPX);
9498 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9499
9500 ins_cost(200); // XXX
9501 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9502 ins_encode %{
9503 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9504 %}
9505 ins_pipe(pipe_cmov_reg); // XXX
9506 %}
9507
9508 // Conditional move ndd
9509 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9510 %{
9511 predicate(UseAPX);
9512 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9513
9514 ins_cost(200);
9515 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9516 ins_encode %{
9517 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9518 %}
9519 ins_pipe(pipe_cmov_reg);
9520 %}
9521
9522 // Conditional move
9523 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9524 %{
9525 predicate(!UseAPX);
9526 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9527
9528 ins_cost(200); // XXX
9529 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9530 ins_encode %{
9531 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9532 %}
9533 ins_pipe(pipe_cmov_reg); // XXX
9534 %}
9535
9536 // Conditional move ndd
9537 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9538 %{
9539 predicate(UseAPX);
9540 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9541
9542 ins_cost(200);
9543 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9544 ins_encode %{
9545 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9546 %}
9547 ins_pipe(pipe_cmov_reg);
9548 %}
9549
9550 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9551 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9552
9553 ins_cost(200);
9554 expand %{
9555 cmovP_regU(cop, cr, dst, src);
9556 %}
9557 %}
9558
9559 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9560 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9561
9562 ins_cost(200);
9563 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9564 ins_encode %{
9565 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9566 %}
9567 ins_pipe(pipe_cmov_reg);
9568 %}
9569
9570 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9571 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9572 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9573
9574 ins_cost(200); // XXX
9575 format %{ "cmovpq $dst, $src\n\t"
9576 "cmovneq $dst, $src" %}
9577 ins_encode %{
9578 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9579 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9580 %}
9581 ins_pipe(pipe_cmov_reg);
9582 %}
9583
9584 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9585 // inputs of the CMove
9586 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9587 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9588 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9589
9590 ins_cost(200); // XXX
9591 format %{ "cmovpq $dst, $src\n\t"
9592 "cmovneq $dst, $src" %}
9593 ins_encode %{
9594 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9595 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9596 %}
9597 ins_pipe(pipe_cmov_reg);
9598 %}
9599
9600 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9601 %{
9602 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9603 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9604
9605 ins_cost(100); // XXX
9606 format %{ "setbn$cop $dst\t# signed, long" %}
9607 ins_encode %{
9608 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9609 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9610 %}
9611 ins_pipe(ialu_reg);
9612 %}
9613
9614 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9615 %{
9616 predicate(!UseAPX);
9617 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9618
9619 ins_cost(200); // XXX
9620 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9621 ins_encode %{
9622 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9623 %}
9624 ins_pipe(pipe_cmov_reg); // XXX
9625 %}
9626
9627 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9628 %{
9629 predicate(UseAPX);
9630 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9631
9632 ins_cost(200);
9633 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9634 ins_encode %{
9635 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9636 %}
9637 ins_pipe(pipe_cmov_reg);
9638 %}
9639
9640 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9641 %{
9642 predicate(!UseAPX);
9643 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9644
9645 ins_cost(200); // XXX
9646 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9647 ins_encode %{
9648 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9649 %}
9650 ins_pipe(pipe_cmov_mem); // XXX
9651 %}
9652
9653 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9654 %{
9655 predicate(UseAPX);
9656 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9657
9658 ins_cost(200);
9659 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9660 ins_encode %{
9661 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9662 %}
9663 ins_pipe(pipe_cmov_mem);
9664 %}
9665
9666 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9667 %{
9668 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9669 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9670
9671 ins_cost(100); // XXX
9672 format %{ "setbn$cop $dst\t# unsigned, long" %}
9673 ins_encode %{
9674 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9675 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9676 %}
9677 ins_pipe(ialu_reg);
9678 %}
9679
9680 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9681 %{
9682 predicate(!UseAPX);
9683 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9684
9685 ins_cost(200); // XXX
9686 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9687 ins_encode %{
9688 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9689 %}
9690 ins_pipe(pipe_cmov_reg); // XXX
9691 %}
9692
9693 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9694 %{
9695 predicate(UseAPX);
9696 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9697
9698 ins_cost(200);
9699 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9700 ins_encode %{
9701 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9702 %}
9703 ins_pipe(pipe_cmov_reg);
9704 %}
9705
9706 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9707 %{
9708 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9709 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9710
9711 ins_cost(100); // XXX
9712 format %{ "setbn$cop $dst\t# unsigned, long" %}
9713 ins_encode %{
9714 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9715 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9716 %}
9717 ins_pipe(ialu_reg);
9718 %}
9719
9720 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9721 %{
9722 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9723 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9724
9725 ins_cost(100); // XXX
9726 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9727 ins_encode %{
9728 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9729 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9730 %}
9731 ins_pipe(ialu_reg);
9732 %}
9733
9734 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9735 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9736
9737 ins_cost(200);
9738 expand %{
9739 cmovL_regU(cop, cr, dst, src);
9740 %}
9741 %}
9742
9743 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9744 %{
9745 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9746
9747 ins_cost(200);
9748 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9749 ins_encode %{
9750 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9751 %}
9752 ins_pipe(pipe_cmov_reg);
9753 %}
9754
9755 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9756 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9757 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9758
9759 ins_cost(200); // XXX
9760 format %{ "cmovpq $dst, $src\n\t"
9761 "cmovneq $dst, $src" %}
9762 ins_encode %{
9763 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9764 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9765 %}
9766 ins_pipe(pipe_cmov_reg);
9767 %}
9768
9769 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9770 // inputs of the CMove
9771 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9772 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9773 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9774
9775 ins_cost(200); // XXX
9776 format %{ "cmovpq $dst, $src\n\t"
9777 "cmovneq $dst, $src" %}
9778 ins_encode %{
9779 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9780 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9781 %}
9782 ins_pipe(pipe_cmov_reg);
9783 %}
9784
9785 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9786 %{
9787 predicate(!UseAPX);
9788 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9789
9790 ins_cost(200); // XXX
9791 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9792 ins_encode %{
9793 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9794 %}
9795 ins_pipe(pipe_cmov_mem); // XXX
9796 %}
9797
9798 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9799 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9800
9801 ins_cost(200);
9802 expand %{
9803 cmovL_memU(cop, cr, dst, src);
9804 %}
9805 %}
9806
9807 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9808 %{
9809 predicate(UseAPX);
9810 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9811
9812 ins_cost(200);
9813 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9814 ins_encode %{
9815 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9816 %}
9817 ins_pipe(pipe_cmov_mem);
9818 %}
9819
9820 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9821 %{
9822 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9823
9824 ins_cost(200);
9825 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9826 ins_encode %{
9827 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9828 %}
9829 ins_pipe(pipe_cmov_mem);
9830 %}
9831
9832 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9833 %{
9834 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9835
9836 ins_cost(200); // XXX
9837 format %{ "jn$cop skip\t# signed cmove float\n\t"
9838 "movss $dst, $src\n"
9839 "skip:" %}
9840 ins_encode %{
9841 Label Lskip;
9842 // Invert sense of branch from sense of CMOV
9843 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9844 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9845 __ bind(Lskip);
9846 %}
9847 ins_pipe(pipe_slow);
9848 %}
9849
9850 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9851 %{
9852 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9853
9854 ins_cost(200); // XXX
9855 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9856 "movss $dst, $src\n"
9857 "skip:" %}
9858 ins_encode %{
9859 Label Lskip;
9860 // Invert sense of branch from sense of CMOV
9861 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9862 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9863 __ bind(Lskip);
9864 %}
9865 ins_pipe(pipe_slow);
9866 %}
9867
9868 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9869 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9870
9871 ins_cost(200);
9872 expand %{
9873 cmovF_regU(cop, cr, dst, src);
9874 %}
9875 %}
9876
9877 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9878 %{
9879 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9880
9881 ins_cost(200); // XXX
9882 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9883 "movss $dst, $src\n"
9884 "skip:" %}
9885 ins_encode %{
9886 Label Lskip;
9887 // Invert sense of branch from sense of CMOV
9888 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9889 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9890 __ bind(Lskip);
9891 %}
9892 ins_pipe(pipe_slow);
9893 %}
9894
9895 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9896 %{
9897 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9898
9899 ins_cost(200); // XXX
9900 format %{ "jn$cop skip\t# signed cmove double\n\t"
9901 "movsd $dst, $src\n"
9902 "skip:" %}
9903 ins_encode %{
9904 Label Lskip;
9905 // Invert sense of branch from sense of CMOV
9906 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9907 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9908 __ bind(Lskip);
9909 %}
9910 ins_pipe(pipe_slow);
9911 %}
9912
9913 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9914 %{
9915 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9916
9917 ins_cost(200); // XXX
9918 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9919 "movsd $dst, $src\n"
9920 "skip:" %}
9921 ins_encode %{
9922 Label Lskip;
9923 // Invert sense of branch from sense of CMOV
9924 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9925 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9926 __ bind(Lskip);
9927 %}
9928 ins_pipe(pipe_slow);
9929 %}
9930
9931 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9932 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9933
9934 ins_cost(200);
9935 expand %{
9936 cmovD_regU(cop, cr, dst, src);
9937 %}
9938 %}
9939
9940 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9941 %{
9942 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9943
9944 ins_cost(200); // XXX
9945 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9946 "movsd $dst, $src\n"
9947 "skip:" %}
9948 ins_encode %{
9949 Label Lskip;
9950 // Invert sense of branch from sense of CMOV
9951 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9952 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9953 __ bind(Lskip);
9954 %}
9955 ins_pipe(pipe_slow);
9956 %}
9957
9958 //----------Arithmetic Instructions--------------------------------------------
9959 //----------Addition Instructions----------------------------------------------
9960
9961 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9962 %{
9963 predicate(!UseAPX);
9964 match(Set dst (AddI dst src));
9965 effect(KILL cr);
9966 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9967 format %{ "addl $dst, $src\t# int" %}
9968 ins_encode %{
9969 __ addl($dst$$Register, $src$$Register);
9970 %}
9971 ins_pipe(ialu_reg_reg);
9972 %}
9973
9974 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9975 %{
9976 predicate(UseAPX);
9977 match(Set dst (AddI src1 src2));
9978 effect(KILL cr);
9979 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9980
9981 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9982 ins_encode %{
9983 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9984 %}
9985 ins_pipe(ialu_reg_reg);
9986 %}
9987
9988 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9989 %{
9990 predicate(!UseAPX);
9991 match(Set dst (AddI dst src));
9992 effect(KILL cr);
9993 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9994
9995 format %{ "addl $dst, $src\t# int" %}
9996 ins_encode %{
9997 __ addl($dst$$Register, $src$$constant);
9998 %}
9999 ins_pipe( ialu_reg );
10000 %}
10001
10002 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10003 %{
10004 predicate(UseAPX);
10005 match(Set dst (AddI src1 src2));
10006 effect(KILL cr);
10007 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10008
10009 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10010 ins_encode %{
10011 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
10012 %}
10013 ins_pipe( ialu_reg );
10014 %}
10015
10016 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
10017 %{
10018 predicate(UseAPX);
10019 match(Set dst (AddI (LoadI src1) src2));
10020 effect(KILL cr);
10021 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10022
10023 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10024 ins_encode %{
10025 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
10026 %}
10027 ins_pipe( ialu_reg );
10028 %}
10029
10030 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10031 %{
10032 predicate(!UseAPX);
10033 match(Set dst (AddI dst (LoadI src)));
10034 effect(KILL cr);
10035 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10036
10037 ins_cost(150); // XXX
10038 format %{ "addl $dst, $src\t# int" %}
10039 ins_encode %{
10040 __ addl($dst$$Register, $src$$Address);
10041 %}
10042 ins_pipe(ialu_reg_mem);
10043 %}
10044
10045 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10046 %{
10047 predicate(UseAPX);
10048 match(Set dst (AddI src1 (LoadI src2)));
10049 effect(KILL cr);
10050 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10051
10052 ins_cost(150);
10053 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10054 ins_encode %{
10055 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10056 %}
10057 ins_pipe(ialu_reg_mem);
10058 %}
10059
10060 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10061 %{
10062 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10063 effect(KILL cr);
10064 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10065
10066 ins_cost(150); // XXX
10067 format %{ "addl $dst, $src\t# int" %}
10068 ins_encode %{
10069 __ addl($dst$$Address, $src$$Register);
10070 %}
10071 ins_pipe(ialu_mem_reg);
10072 %}
10073
10074 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10075 %{
10076 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10077 effect(KILL cr);
10078 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10079
10080
10081 ins_cost(125); // XXX
10082 format %{ "addl $dst, $src\t# int" %}
10083 ins_encode %{
10084 __ addl($dst$$Address, $src$$constant);
10085 %}
10086 ins_pipe(ialu_mem_imm);
10087 %}
10088
10089 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10090 %{
10091 predicate(!UseAPX && UseIncDec);
10092 match(Set dst (AddI dst src));
10093 effect(KILL cr);
10094
10095 format %{ "incl $dst\t# int" %}
10096 ins_encode %{
10097 __ incrementl($dst$$Register);
10098 %}
10099 ins_pipe(ialu_reg);
10100 %}
10101
10102 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10103 %{
10104 predicate(UseAPX && UseIncDec);
10105 match(Set dst (AddI src val));
10106 effect(KILL cr);
10107 flag(PD::Flag_ndd_demotable_opr1);
10108
10109 format %{ "eincl $dst, $src\t# int ndd" %}
10110 ins_encode %{
10111 __ eincl($dst$$Register, $src$$Register, false);
10112 %}
10113 ins_pipe(ialu_reg);
10114 %}
10115
10116 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10117 %{
10118 predicate(UseAPX && UseIncDec);
10119 match(Set dst (AddI (LoadI src) val));
10120 effect(KILL cr);
10121
10122 format %{ "eincl $dst, $src\t# int ndd" %}
10123 ins_encode %{
10124 __ eincl($dst$$Register, $src$$Address, false);
10125 %}
10126 ins_pipe(ialu_reg);
10127 %}
10128
10129 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10130 %{
10131 predicate(UseIncDec);
10132 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10133 effect(KILL cr);
10134
10135 ins_cost(125); // XXX
10136 format %{ "incl $dst\t# int" %}
10137 ins_encode %{
10138 __ incrementl($dst$$Address);
10139 %}
10140 ins_pipe(ialu_mem_imm);
10141 %}
10142
10143 // XXX why does that use AddI
10144 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10145 %{
10146 predicate(!UseAPX && UseIncDec);
10147 match(Set dst (AddI dst src));
10148 effect(KILL cr);
10149
10150 format %{ "decl $dst\t# int" %}
10151 ins_encode %{
10152 __ decrementl($dst$$Register);
10153 %}
10154 ins_pipe(ialu_reg);
10155 %}
10156
10157 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10158 %{
10159 predicate(UseAPX && UseIncDec);
10160 match(Set dst (AddI src val));
10161 effect(KILL cr);
10162 flag(PD::Flag_ndd_demotable_opr1);
10163
10164 format %{ "edecl $dst, $src\t# int ndd" %}
10165 ins_encode %{
10166 __ edecl($dst$$Register, $src$$Register, false);
10167 %}
10168 ins_pipe(ialu_reg);
10169 %}
10170
10171 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10172 %{
10173 predicate(UseAPX && UseIncDec);
10174 match(Set dst (AddI (LoadI src) val));
10175 effect(KILL cr);
10176
10177 format %{ "edecl $dst, $src\t# int ndd" %}
10178 ins_encode %{
10179 __ edecl($dst$$Register, $src$$Address, false);
10180 %}
10181 ins_pipe(ialu_reg);
10182 %}
10183
10184 // XXX why does that use AddI
10185 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10186 %{
10187 predicate(UseIncDec);
10188 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10189 effect(KILL cr);
10190
10191 ins_cost(125); // XXX
10192 format %{ "decl $dst\t# int" %}
10193 ins_encode %{
10194 __ decrementl($dst$$Address);
10195 %}
10196 ins_pipe(ialu_mem_imm);
10197 %}
10198
10199 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10200 %{
10201 predicate(VM_Version::supports_fast_2op_lea());
10202 match(Set dst (AddI (LShiftI index scale) disp));
10203
10204 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10205 ins_encode %{
10206 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10207 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10208 %}
10209 ins_pipe(ialu_reg_reg);
10210 %}
10211
10212 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10213 %{
10214 predicate(VM_Version::supports_fast_3op_lea());
10215 match(Set dst (AddI (AddI base index) disp));
10216
10217 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10218 ins_encode %{
10219 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10220 %}
10221 ins_pipe(ialu_reg_reg);
10222 %}
10223
10224 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10225 %{
10226 predicate(VM_Version::supports_fast_2op_lea());
10227 match(Set dst (AddI base (LShiftI index scale)));
10228
10229 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10230 ins_encode %{
10231 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10232 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10233 %}
10234 ins_pipe(ialu_reg_reg);
10235 %}
10236
10237 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10238 %{
10239 predicate(VM_Version::supports_fast_3op_lea());
10240 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10241
10242 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10243 ins_encode %{
10244 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10245 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10246 %}
10247 ins_pipe(ialu_reg_reg);
10248 %}
10249
10250 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10251 %{
10252 predicate(!UseAPX);
10253 match(Set dst (AddL dst src));
10254 effect(KILL cr);
10255 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10256
10257 format %{ "addq $dst, $src\t# long" %}
10258 ins_encode %{
10259 __ addq($dst$$Register, $src$$Register);
10260 %}
10261 ins_pipe(ialu_reg_reg);
10262 %}
10263
10264 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10265 %{
10266 predicate(UseAPX);
10267 match(Set dst (AddL src1 src2));
10268 effect(KILL cr);
10269 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10270
10271 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10272 ins_encode %{
10273 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10274 %}
10275 ins_pipe(ialu_reg_reg);
10276 %}
10277
10278 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10279 %{
10280 predicate(!UseAPX);
10281 match(Set dst (AddL dst src));
10282 effect(KILL cr);
10283 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10284
10285 format %{ "addq $dst, $src\t# long" %}
10286 ins_encode %{
10287 __ addq($dst$$Register, $src$$constant);
10288 %}
10289 ins_pipe( ialu_reg );
10290 %}
10291
10292 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10293 %{
10294 predicate(UseAPX);
10295 match(Set dst (AddL src1 src2));
10296 effect(KILL cr);
10297 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10298
10299 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10300 ins_encode %{
10301 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10302 %}
10303 ins_pipe( ialu_reg );
10304 %}
10305
10306 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10307 %{
10308 predicate(UseAPX);
10309 match(Set dst (AddL (LoadL src1) src2));
10310 effect(KILL cr);
10311 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10312
10313 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10314 ins_encode %{
10315 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10316 %}
10317 ins_pipe( ialu_reg );
10318 %}
10319
10320 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10321 %{
10322 predicate(!UseAPX);
10323 match(Set dst (AddL dst (LoadL src)));
10324 effect(KILL cr);
10325 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10326
10327 ins_cost(150); // XXX
10328 format %{ "addq $dst, $src\t# long" %}
10329 ins_encode %{
10330 __ addq($dst$$Register, $src$$Address);
10331 %}
10332 ins_pipe(ialu_reg_mem);
10333 %}
10334
10335 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10336 %{
10337 predicate(UseAPX);
10338 match(Set dst (AddL src1 (LoadL src2)));
10339 effect(KILL cr);
10340 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10341
10342 ins_cost(150);
10343 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10344 ins_encode %{
10345 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10346 %}
10347 ins_pipe(ialu_reg_mem);
10348 %}
10349
10350 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10351 %{
10352 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10353 effect(KILL cr);
10354 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10355
10356 ins_cost(150); // XXX
10357 format %{ "addq $dst, $src\t# long" %}
10358 ins_encode %{
10359 __ addq($dst$$Address, $src$$Register);
10360 %}
10361 ins_pipe(ialu_mem_reg);
10362 %}
10363
10364 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10365 %{
10366 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10367 effect(KILL cr);
10368 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10369
10370 ins_cost(125); // XXX
10371 format %{ "addq $dst, $src\t# long" %}
10372 ins_encode %{
10373 __ addq($dst$$Address, $src$$constant);
10374 %}
10375 ins_pipe(ialu_mem_imm);
10376 %}
10377
10378 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10379 %{
10380 predicate(!UseAPX && UseIncDec);
10381 match(Set dst (AddL dst src));
10382 effect(KILL cr);
10383
10384 format %{ "incq $dst\t# long" %}
10385 ins_encode %{
10386 __ incrementq($dst$$Register);
10387 %}
10388 ins_pipe(ialu_reg);
10389 %}
10390
10391 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10392 %{
10393 predicate(UseAPX && UseIncDec);
10394 match(Set dst (AddL src val));
10395 effect(KILL cr);
10396 flag(PD::Flag_ndd_demotable_opr1);
10397
10398 format %{ "eincq $dst, $src\t# long ndd" %}
10399 ins_encode %{
10400 __ eincq($dst$$Register, $src$$Register, false);
10401 %}
10402 ins_pipe(ialu_reg);
10403 %}
10404
10405 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10406 %{
10407 predicate(UseAPX && UseIncDec);
10408 match(Set dst (AddL (LoadL src) val));
10409 effect(KILL cr);
10410
10411 format %{ "eincq $dst, $src\t# long ndd" %}
10412 ins_encode %{
10413 __ eincq($dst$$Register, $src$$Address, false);
10414 %}
10415 ins_pipe(ialu_reg);
10416 %}
10417
10418 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10419 %{
10420 predicate(UseIncDec);
10421 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10422 effect(KILL cr);
10423
10424 ins_cost(125); // XXX
10425 format %{ "incq $dst\t# long" %}
10426 ins_encode %{
10427 __ incrementq($dst$$Address);
10428 %}
10429 ins_pipe(ialu_mem_imm);
10430 %}
10431
10432 // XXX why does that use AddL
10433 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10434 %{
10435 predicate(!UseAPX && UseIncDec);
10436 match(Set dst (AddL dst src));
10437 effect(KILL cr);
10438
10439 format %{ "decq $dst\t# long" %}
10440 ins_encode %{
10441 __ decrementq($dst$$Register);
10442 %}
10443 ins_pipe(ialu_reg);
10444 %}
10445
10446 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10447 %{
10448 predicate(UseAPX && UseIncDec);
10449 match(Set dst (AddL src val));
10450 effect(KILL cr);
10451 flag(PD::Flag_ndd_demotable_opr1);
10452
10453 format %{ "edecq $dst, $src\t# long ndd" %}
10454 ins_encode %{
10455 __ edecq($dst$$Register, $src$$Register, false);
10456 %}
10457 ins_pipe(ialu_reg);
10458 %}
10459
10460 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10461 %{
10462 predicate(UseAPX && UseIncDec);
10463 match(Set dst (AddL (LoadL src) val));
10464 effect(KILL cr);
10465
10466 format %{ "edecq $dst, $src\t# long ndd" %}
10467 ins_encode %{
10468 __ edecq($dst$$Register, $src$$Address, false);
10469 %}
10470 ins_pipe(ialu_reg);
10471 %}
10472
10473 // XXX why does that use AddL
10474 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10475 %{
10476 predicate(UseIncDec);
10477 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10478 effect(KILL cr);
10479
10480 ins_cost(125); // XXX
10481 format %{ "decq $dst\t# long" %}
10482 ins_encode %{
10483 __ decrementq($dst$$Address);
10484 %}
10485 ins_pipe(ialu_mem_imm);
10486 %}
10487
10488 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10489 %{
10490 predicate(VM_Version::supports_fast_2op_lea());
10491 match(Set dst (AddL (LShiftL index scale) disp));
10492
10493 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10494 ins_encode %{
10495 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10496 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10497 %}
10498 ins_pipe(ialu_reg_reg);
10499 %}
10500
10501 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10502 %{
10503 predicate(VM_Version::supports_fast_3op_lea());
10504 match(Set dst (AddL (AddL base index) disp));
10505
10506 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10507 ins_encode %{
10508 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10509 %}
10510 ins_pipe(ialu_reg_reg);
10511 %}
10512
10513 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10514 %{
10515 predicate(VM_Version::supports_fast_2op_lea());
10516 match(Set dst (AddL base (LShiftL index scale)));
10517
10518 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10519 ins_encode %{
10520 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10521 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10522 %}
10523 ins_pipe(ialu_reg_reg);
10524 %}
10525
10526 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10527 %{
10528 predicate(VM_Version::supports_fast_3op_lea());
10529 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10530
10531 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10532 ins_encode %{
10533 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10534 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10535 %}
10536 ins_pipe(ialu_reg_reg);
10537 %}
10538
10539 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10540 %{
10541 match(Set dst (AddP dst src));
10542 effect(KILL cr);
10543 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10544
10545 format %{ "addq $dst, $src\t# ptr" %}
10546 ins_encode %{
10547 __ addq($dst$$Register, $src$$Register);
10548 %}
10549 ins_pipe(ialu_reg_reg);
10550 %}
10551
10552 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10553 %{
10554 match(Set dst (AddP dst src));
10555 effect(KILL cr);
10556 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10557
10558 format %{ "addq $dst, $src\t# ptr" %}
10559 ins_encode %{
10560 __ addq($dst$$Register, $src$$constant);
10561 %}
10562 ins_pipe( ialu_reg );
10563 %}
10564
10565 // XXX addP mem ops ????
10566
10567 instruct checkCastPP(rRegP dst)
10568 %{
10569 match(Set dst (CheckCastPP dst));
10570
10571 size(0);
10572 format %{ "# checkcastPP of $dst" %}
10573 ins_encode(/* empty encoding */);
10574 ins_pipe(empty);
10575 %}
10576
10577 instruct castPP(rRegP dst)
10578 %{
10579 match(Set dst (CastPP dst));
10580
10581 size(0);
10582 format %{ "# castPP of $dst" %}
10583 ins_encode(/* empty encoding */);
10584 ins_pipe(empty);
10585 %}
10586
10587 instruct castII(rRegI dst)
10588 %{
10589 predicate(VerifyConstraintCasts == 0);
10590 match(Set dst (CastII dst));
10591
10592 size(0);
10593 format %{ "# castII of $dst" %}
10594 ins_encode(/* empty encoding */);
10595 ins_cost(0);
10596 ins_pipe(empty);
10597 %}
10598
10599 instruct castII_checked(rRegI dst, rFlagsReg cr)
10600 %{
10601 predicate(VerifyConstraintCasts > 0);
10602 match(Set dst (CastII dst));
10603
10604 effect(KILL cr);
10605 format %{ "# cast_checked_II $dst" %}
10606 ins_encode %{
10607 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10608 %}
10609 ins_pipe(pipe_slow);
10610 %}
10611
10612 instruct castLL(rRegL dst)
10613 %{
10614 predicate(VerifyConstraintCasts == 0);
10615 match(Set dst (CastLL dst));
10616
10617 size(0);
10618 format %{ "# castLL of $dst" %}
10619 ins_encode(/* empty encoding */);
10620 ins_cost(0);
10621 ins_pipe(empty);
10622 %}
10623
10624 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10625 %{
10626 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10627 match(Set dst (CastLL dst));
10628
10629 effect(KILL cr);
10630 format %{ "# cast_checked_LL $dst" %}
10631 ins_encode %{
10632 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10633 %}
10634 ins_pipe(pipe_slow);
10635 %}
10636
10637 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10638 %{
10639 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10640 match(Set dst (CastLL dst));
10641
10642 effect(KILL cr, TEMP tmp);
10643 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10644 ins_encode %{
10645 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10646 %}
10647 ins_pipe(pipe_slow);
10648 %}
10649
10650 instruct castFF(regF dst)
10651 %{
10652 match(Set dst (CastFF dst));
10653
10654 size(0);
10655 format %{ "# castFF of $dst" %}
10656 ins_encode(/* empty encoding */);
10657 ins_cost(0);
10658 ins_pipe(empty);
10659 %}
10660
10661 instruct castHH(regF dst)
10662 %{
10663 match(Set dst (CastHH dst));
10664
10665 size(0);
10666 format %{ "# castHH of $dst" %}
10667 ins_encode(/* empty encoding */);
10668 ins_cost(0);
10669 ins_pipe(empty);
10670 %}
10671
10672 instruct castDD(regD dst)
10673 %{
10674 match(Set dst (CastDD dst));
10675
10676 size(0);
10677 format %{ "# castDD of $dst" %}
10678 ins_encode(/* empty encoding */);
10679 ins_cost(0);
10680 ins_pipe(empty);
10681 %}
10682
10683 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10684 instruct compareAndSwapP(rRegI res,
10685 memory mem_ptr,
10686 rax_RegP oldval, rRegP newval,
10687 rFlagsReg cr)
10688 %{
10689 predicate(n->as_LoadStore()->barrier_data() == 0);
10690 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10691 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10692 effect(KILL cr, KILL oldval);
10693
10694 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10695 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10696 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10697 ins_encode %{
10698 __ lock();
10699 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10700 __ setcc(Assembler::equal, $res$$Register);
10701 %}
10702 ins_pipe( pipe_cmpxchg );
10703 %}
10704
10705 instruct compareAndSwapL(rRegI res,
10706 memory mem_ptr,
10707 rax_RegL oldval, rRegL newval,
10708 rFlagsReg cr)
10709 %{
10710 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10711 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10712 effect(KILL cr, KILL oldval);
10713
10714 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10715 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10716 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10717 ins_encode %{
10718 __ lock();
10719 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10720 __ setcc(Assembler::equal, $res$$Register);
10721 %}
10722 ins_pipe( pipe_cmpxchg );
10723 %}
10724
10725 instruct compareAndSwapI(rRegI res,
10726 memory mem_ptr,
10727 rax_RegI oldval, rRegI newval,
10728 rFlagsReg cr)
10729 %{
10730 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10731 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10732 effect(KILL cr, KILL oldval);
10733
10734 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10735 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10736 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10737 ins_encode %{
10738 __ lock();
10739 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10740 __ setcc(Assembler::equal, $res$$Register);
10741 %}
10742 ins_pipe( pipe_cmpxchg );
10743 %}
10744
10745 instruct compareAndSwapB(rRegI res,
10746 memory mem_ptr,
10747 rax_RegI oldval, rRegI newval,
10748 rFlagsReg cr)
10749 %{
10750 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10751 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10752 effect(KILL cr, KILL oldval);
10753
10754 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10755 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10756 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10757 ins_encode %{
10758 __ lock();
10759 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10760 __ setcc(Assembler::equal, $res$$Register);
10761 %}
10762 ins_pipe( pipe_cmpxchg );
10763 %}
10764
10765 instruct compareAndSwapS(rRegI res,
10766 memory mem_ptr,
10767 rax_RegI oldval, rRegI newval,
10768 rFlagsReg cr)
10769 %{
10770 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10771 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10772 effect(KILL cr, KILL oldval);
10773
10774 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10775 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10776 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10777 ins_encode %{
10778 __ lock();
10779 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10780 __ setcc(Assembler::equal, $res$$Register);
10781 %}
10782 ins_pipe( pipe_cmpxchg );
10783 %}
10784
10785 instruct compareAndSwapN(rRegI res,
10786 memory mem_ptr,
10787 rax_RegN oldval, rRegN newval,
10788 rFlagsReg cr) %{
10789 predicate(n->as_LoadStore()->barrier_data() == 0);
10790 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10791 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10792 effect(KILL cr, KILL oldval);
10793
10794 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10795 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10796 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10797 ins_encode %{
10798 __ lock();
10799 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10800 __ setcc(Assembler::equal, $res$$Register);
10801 %}
10802 ins_pipe( pipe_cmpxchg );
10803 %}
10804
10805 instruct compareAndExchangeB(
10806 memory mem_ptr,
10807 rax_RegI oldval, rRegI newval,
10808 rFlagsReg cr)
10809 %{
10810 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10811 effect(KILL cr);
10812
10813 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10814 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10815 ins_encode %{
10816 __ lock();
10817 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10818 %}
10819 ins_pipe( pipe_cmpxchg );
10820 %}
10821
10822 instruct compareAndExchangeS(
10823 memory mem_ptr,
10824 rax_RegI oldval, rRegI newval,
10825 rFlagsReg cr)
10826 %{
10827 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10828 effect(KILL cr);
10829
10830 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10831 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10832 ins_encode %{
10833 __ lock();
10834 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10835 %}
10836 ins_pipe( pipe_cmpxchg );
10837 %}
10838
10839 instruct compareAndExchangeI(
10840 memory mem_ptr,
10841 rax_RegI oldval, rRegI newval,
10842 rFlagsReg cr)
10843 %{
10844 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10845 effect(KILL cr);
10846
10847 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10848 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10849 ins_encode %{
10850 __ lock();
10851 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10852 %}
10853 ins_pipe( pipe_cmpxchg );
10854 %}
10855
10856 instruct compareAndExchangeL(
10857 memory mem_ptr,
10858 rax_RegL oldval, rRegL newval,
10859 rFlagsReg cr)
10860 %{
10861 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10862 effect(KILL cr);
10863
10864 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10865 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10866 ins_encode %{
10867 __ lock();
10868 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10869 %}
10870 ins_pipe( pipe_cmpxchg );
10871 %}
10872
10873 instruct compareAndExchangeN(
10874 memory mem_ptr,
10875 rax_RegN oldval, rRegN newval,
10876 rFlagsReg cr) %{
10877 predicate(n->as_LoadStore()->barrier_data() == 0);
10878 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10879 effect(KILL cr);
10880
10881 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10882 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10883 ins_encode %{
10884 __ lock();
10885 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10886 %}
10887 ins_pipe( pipe_cmpxchg );
10888 %}
10889
10890 instruct compareAndExchangeP(
10891 memory mem_ptr,
10892 rax_RegP oldval, rRegP newval,
10893 rFlagsReg cr)
10894 %{
10895 predicate(n->as_LoadStore()->barrier_data() == 0);
10896 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10897 effect(KILL cr);
10898
10899 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10900 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10901 ins_encode %{
10902 __ lock();
10903 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10904 %}
10905 ins_pipe( pipe_cmpxchg );
10906 %}
10907
10908 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10909 predicate(n->as_LoadStore()->result_not_used());
10910 match(Set dummy (GetAndAddB mem add));
10911 effect(KILL cr);
10912 format %{ "addb_lock $mem, $add" %}
10913 ins_encode %{
10914 __ lock();
10915 __ addb($mem$$Address, $add$$Register);
10916 %}
10917 ins_pipe(pipe_cmpxchg);
10918 %}
10919
10920 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10921 predicate(n->as_LoadStore()->result_not_used());
10922 match(Set dummy (GetAndAddB mem add));
10923 effect(KILL cr);
10924 format %{ "addb_lock $mem, $add" %}
10925 ins_encode %{
10926 __ lock();
10927 __ addb($mem$$Address, $add$$constant);
10928 %}
10929 ins_pipe(pipe_cmpxchg);
10930 %}
10931
10932 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10933 predicate(!n->as_LoadStore()->result_not_used());
10934 match(Set newval (GetAndAddB mem newval));
10935 effect(KILL cr);
10936 format %{ "xaddb_lock $mem, $newval" %}
10937 ins_encode %{
10938 __ lock();
10939 __ xaddb($mem$$Address, $newval$$Register);
10940 %}
10941 ins_pipe(pipe_cmpxchg);
10942 %}
10943
10944 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10945 predicate(n->as_LoadStore()->result_not_used());
10946 match(Set dummy (GetAndAddS mem add));
10947 effect(KILL cr);
10948 format %{ "addw_lock $mem, $add" %}
10949 ins_encode %{
10950 __ lock();
10951 __ addw($mem$$Address, $add$$Register);
10952 %}
10953 ins_pipe(pipe_cmpxchg);
10954 %}
10955
10956 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10957 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10958 match(Set dummy (GetAndAddS mem add));
10959 effect(KILL cr);
10960 format %{ "addw_lock $mem, $add" %}
10961 ins_encode %{
10962 __ lock();
10963 __ addw($mem$$Address, $add$$constant);
10964 %}
10965 ins_pipe(pipe_cmpxchg);
10966 %}
10967
10968 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10969 predicate(!n->as_LoadStore()->result_not_used());
10970 match(Set newval (GetAndAddS mem newval));
10971 effect(KILL cr);
10972 format %{ "xaddw_lock $mem, $newval" %}
10973 ins_encode %{
10974 __ lock();
10975 __ xaddw($mem$$Address, $newval$$Register);
10976 %}
10977 ins_pipe(pipe_cmpxchg);
10978 %}
10979
10980 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10981 predicate(n->as_LoadStore()->result_not_used());
10982 match(Set dummy (GetAndAddI mem add));
10983 effect(KILL cr);
10984 format %{ "addl_lock $mem, $add" %}
10985 ins_encode %{
10986 __ lock();
10987 __ addl($mem$$Address, $add$$Register);
10988 %}
10989 ins_pipe(pipe_cmpxchg);
10990 %}
10991
10992 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10993 predicate(n->as_LoadStore()->result_not_used());
10994 match(Set dummy (GetAndAddI mem add));
10995 effect(KILL cr);
10996 format %{ "addl_lock $mem, $add" %}
10997 ins_encode %{
10998 __ lock();
10999 __ addl($mem$$Address, $add$$constant);
11000 %}
11001 ins_pipe(pipe_cmpxchg);
11002 %}
11003
11004 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
11005 predicate(!n->as_LoadStore()->result_not_used());
11006 match(Set newval (GetAndAddI mem newval));
11007 effect(KILL cr);
11008 format %{ "xaddl_lock $mem, $newval" %}
11009 ins_encode %{
11010 __ lock();
11011 __ xaddl($mem$$Address, $newval$$Register);
11012 %}
11013 ins_pipe(pipe_cmpxchg);
11014 %}
11015
11016 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
11017 predicate(n->as_LoadStore()->result_not_used());
11018 match(Set dummy (GetAndAddL mem add));
11019 effect(KILL cr);
11020 format %{ "addq_lock $mem, $add" %}
11021 ins_encode %{
11022 __ lock();
11023 __ addq($mem$$Address, $add$$Register);
11024 %}
11025 ins_pipe(pipe_cmpxchg);
11026 %}
11027
11028 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11029 predicate(n->as_LoadStore()->result_not_used());
11030 match(Set dummy (GetAndAddL mem add));
11031 effect(KILL cr);
11032 format %{ "addq_lock $mem, $add" %}
11033 ins_encode %{
11034 __ lock();
11035 __ addq($mem$$Address, $add$$constant);
11036 %}
11037 ins_pipe(pipe_cmpxchg);
11038 %}
11039
11040 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11041 predicate(!n->as_LoadStore()->result_not_used());
11042 match(Set newval (GetAndAddL mem newval));
11043 effect(KILL cr);
11044 format %{ "xaddq_lock $mem, $newval" %}
11045 ins_encode %{
11046 __ lock();
11047 __ xaddq($mem$$Address, $newval$$Register);
11048 %}
11049 ins_pipe(pipe_cmpxchg);
11050 %}
11051
11052 instruct xchgB( memory mem, rRegI newval) %{
11053 match(Set newval (GetAndSetB mem newval));
11054 format %{ "XCHGB $newval,[$mem]" %}
11055 ins_encode %{
11056 __ xchgb($newval$$Register, $mem$$Address);
11057 %}
11058 ins_pipe( pipe_cmpxchg );
11059 %}
11060
11061 instruct xchgS( memory mem, rRegI newval) %{
11062 match(Set newval (GetAndSetS mem newval));
11063 format %{ "XCHGW $newval,[$mem]" %}
11064 ins_encode %{
11065 __ xchgw($newval$$Register, $mem$$Address);
11066 %}
11067 ins_pipe( pipe_cmpxchg );
11068 %}
11069
11070 instruct xchgI( memory mem, rRegI newval) %{
11071 match(Set newval (GetAndSetI mem newval));
11072 format %{ "XCHGL $newval,[$mem]" %}
11073 ins_encode %{
11074 __ xchgl($newval$$Register, $mem$$Address);
11075 %}
11076 ins_pipe( pipe_cmpxchg );
11077 %}
11078
11079 instruct xchgL( memory mem, rRegL newval) %{
11080 match(Set newval (GetAndSetL mem newval));
11081 format %{ "XCHGL $newval,[$mem]" %}
11082 ins_encode %{
11083 __ xchgq($newval$$Register, $mem$$Address);
11084 %}
11085 ins_pipe( pipe_cmpxchg );
11086 %}
11087
11088 instruct xchgP( memory mem, rRegP newval) %{
11089 match(Set newval (GetAndSetP mem newval));
11090 predicate(n->as_LoadStore()->barrier_data() == 0);
11091 format %{ "XCHGQ $newval,[$mem]" %}
11092 ins_encode %{
11093 __ xchgq($newval$$Register, $mem$$Address);
11094 %}
11095 ins_pipe( pipe_cmpxchg );
11096 %}
11097
11098 instruct xchgN( memory mem, rRegN newval) %{
11099 predicate(n->as_LoadStore()->barrier_data() == 0);
11100 match(Set newval (GetAndSetN mem newval));
11101 format %{ "XCHGL $newval,$mem]" %}
11102 ins_encode %{
11103 __ xchgl($newval$$Register, $mem$$Address);
11104 %}
11105 ins_pipe( pipe_cmpxchg );
11106 %}
11107
11108 //----------Abs Instructions-------------------------------------------
11109
11110 // Integer Absolute Instructions
11111 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11112 %{
11113 match(Set dst (AbsI src));
11114 effect(TEMP dst, KILL cr);
11115 format %{ "xorl $dst, $dst\t# abs int\n\t"
11116 "subl $dst, $src\n\t"
11117 "cmovll $dst, $src" %}
11118 ins_encode %{
11119 __ xorl($dst$$Register, $dst$$Register);
11120 __ subl($dst$$Register, $src$$Register);
11121 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11122 %}
11123
11124 ins_pipe(ialu_reg_reg);
11125 %}
11126
11127 // Long Absolute Instructions
11128 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11129 %{
11130 match(Set dst (AbsL src));
11131 effect(TEMP dst, KILL cr);
11132 format %{ "xorl $dst, $dst\t# abs long\n\t"
11133 "subq $dst, $src\n\t"
11134 "cmovlq $dst, $src" %}
11135 ins_encode %{
11136 __ xorl($dst$$Register, $dst$$Register);
11137 __ subq($dst$$Register, $src$$Register);
11138 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11139 %}
11140
11141 ins_pipe(ialu_reg_reg);
11142 %}
11143
11144 //----------Subtraction Instructions-------------------------------------------
11145
11146 // Integer Subtraction Instructions
11147 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11148 %{
11149 predicate(!UseAPX);
11150 match(Set dst (SubI dst src));
11151 effect(KILL cr);
11152 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11153
11154 format %{ "subl $dst, $src\t# int" %}
11155 ins_encode %{
11156 __ subl($dst$$Register, $src$$Register);
11157 %}
11158 ins_pipe(ialu_reg_reg);
11159 %}
11160
11161 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11162 %{
11163 predicate(UseAPX);
11164 match(Set dst (SubI src1 src2));
11165 effect(KILL cr);
11166 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11167
11168 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11169 ins_encode %{
11170 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11171 %}
11172 ins_pipe(ialu_reg_reg);
11173 %}
11174
11175 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11176 %{
11177 predicate(UseAPX);
11178 match(Set dst (SubI src1 src2));
11179 effect(KILL cr);
11180 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11181
11182 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11183 ins_encode %{
11184 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11185 %}
11186 ins_pipe(ialu_reg_reg);
11187 %}
11188
11189 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11190 %{
11191 predicate(UseAPX);
11192 match(Set dst (SubI (LoadI src1) src2));
11193 effect(KILL cr);
11194 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11195
11196 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11197 ins_encode %{
11198 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11199 %}
11200 ins_pipe(ialu_reg_reg);
11201 %}
11202
11203 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11204 %{
11205 predicate(!UseAPX);
11206 match(Set dst (SubI dst (LoadI src)));
11207 effect(KILL cr);
11208 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11209
11210 ins_cost(150);
11211 format %{ "subl $dst, $src\t# int" %}
11212 ins_encode %{
11213 __ subl($dst$$Register, $src$$Address);
11214 %}
11215 ins_pipe(ialu_reg_mem);
11216 %}
11217
11218 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11219 %{
11220 predicate(UseAPX);
11221 match(Set dst (SubI src1 (LoadI src2)));
11222 effect(KILL cr);
11223 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11224
11225 ins_cost(150);
11226 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11227 ins_encode %{
11228 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11229 %}
11230 ins_pipe(ialu_reg_mem);
11231 %}
11232
11233 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11234 %{
11235 predicate(UseAPX);
11236 match(Set dst (SubI (LoadI src1) src2));
11237 effect(KILL cr);
11238 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11239
11240 ins_cost(150);
11241 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11242 ins_encode %{
11243 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11244 %}
11245 ins_pipe(ialu_reg_mem);
11246 %}
11247
11248 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11249 %{
11250 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11251 effect(KILL cr);
11252 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11253
11254 ins_cost(150);
11255 format %{ "subl $dst, $src\t# int" %}
11256 ins_encode %{
11257 __ subl($dst$$Address, $src$$Register);
11258 %}
11259 ins_pipe(ialu_mem_reg);
11260 %}
11261
11262 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11263 %{
11264 predicate(!UseAPX);
11265 match(Set dst (SubL dst src));
11266 effect(KILL cr);
11267 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11268
11269 format %{ "subq $dst, $src\t# long" %}
11270 ins_encode %{
11271 __ subq($dst$$Register, $src$$Register);
11272 %}
11273 ins_pipe(ialu_reg_reg);
11274 %}
11275
11276 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11277 %{
11278 predicate(UseAPX);
11279 match(Set dst (SubL src1 src2));
11280 effect(KILL cr);
11281 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11282
11283 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11284 ins_encode %{
11285 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11286 %}
11287 ins_pipe(ialu_reg_reg);
11288 %}
11289
11290 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11291 %{
11292 predicate(UseAPX);
11293 match(Set dst (SubL src1 src2));
11294 effect(KILL cr);
11295 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11296
11297 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11298 ins_encode %{
11299 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11300 %}
11301 ins_pipe(ialu_reg_reg);
11302 %}
11303
11304 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11305 %{
11306 predicate(UseAPX);
11307 match(Set dst (SubL (LoadL src1) src2));
11308 effect(KILL cr);
11309 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11310
11311 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11312 ins_encode %{
11313 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11314 %}
11315 ins_pipe(ialu_reg_reg);
11316 %}
11317
11318 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11319 %{
11320 predicate(!UseAPX);
11321 match(Set dst (SubL dst (LoadL src)));
11322 effect(KILL cr);
11323 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11324
11325 ins_cost(150);
11326 format %{ "subq $dst, $src\t# long" %}
11327 ins_encode %{
11328 __ subq($dst$$Register, $src$$Address);
11329 %}
11330 ins_pipe(ialu_reg_mem);
11331 %}
11332
11333 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11334 %{
11335 predicate(UseAPX);
11336 match(Set dst (SubL src1 (LoadL src2)));
11337 effect(KILL cr);
11338 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11339
11340 ins_cost(150);
11341 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11342 ins_encode %{
11343 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11344 %}
11345 ins_pipe(ialu_reg_mem);
11346 %}
11347
11348 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11349 %{
11350 predicate(UseAPX);
11351 match(Set dst (SubL (LoadL src1) src2));
11352 effect(KILL cr);
11353 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11354
11355 ins_cost(150);
11356 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11357 ins_encode %{
11358 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11359 %}
11360 ins_pipe(ialu_reg_mem);
11361 %}
11362
11363 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11364 %{
11365 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11366 effect(KILL cr);
11367 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11368
11369 ins_cost(150);
11370 format %{ "subq $dst, $src\t# long" %}
11371 ins_encode %{
11372 __ subq($dst$$Address, $src$$Register);
11373 %}
11374 ins_pipe(ialu_mem_reg);
11375 %}
11376
11377 // Subtract from a pointer
11378 // XXX hmpf???
11379 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11380 %{
11381 match(Set dst (AddP dst (SubI zero src)));
11382 effect(KILL cr);
11383
11384 format %{ "subq $dst, $src\t# ptr - int" %}
11385 ins_encode %{
11386 __ subq($dst$$Register, $src$$Register);
11387 %}
11388 ins_pipe(ialu_reg_reg);
11389 %}
11390
11391 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11392 %{
11393 predicate(!UseAPX);
11394 match(Set dst (SubI zero dst));
11395 effect(KILL cr);
11396 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11397
11398 format %{ "negl $dst\t# int" %}
11399 ins_encode %{
11400 __ negl($dst$$Register);
11401 %}
11402 ins_pipe(ialu_reg);
11403 %}
11404
11405 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11406 %{
11407 predicate(UseAPX);
11408 match(Set dst (SubI zero src));
11409 effect(KILL cr);
11410 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11411
11412 format %{ "enegl $dst, $src\t# int ndd" %}
11413 ins_encode %{
11414 __ enegl($dst$$Register, $src$$Register, false);
11415 %}
11416 ins_pipe(ialu_reg);
11417 %}
11418
11419 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11420 %{
11421 predicate(!UseAPX);
11422 match(Set dst (NegI dst));
11423 effect(KILL cr);
11424 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11425
11426 format %{ "negl $dst\t# int" %}
11427 ins_encode %{
11428 __ negl($dst$$Register);
11429 %}
11430 ins_pipe(ialu_reg);
11431 %}
11432
11433 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11434 %{
11435 predicate(UseAPX);
11436 match(Set dst (NegI src));
11437 effect(KILL cr);
11438 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11439
11440 format %{ "enegl $dst, $src\t# int ndd" %}
11441 ins_encode %{
11442 __ enegl($dst$$Register, $src$$Register, false);
11443 %}
11444 ins_pipe(ialu_reg);
11445 %}
11446
11447 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11448 %{
11449 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11450 effect(KILL cr);
11451 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11452
11453 format %{ "negl $dst\t# int" %}
11454 ins_encode %{
11455 __ negl($dst$$Address);
11456 %}
11457 ins_pipe(ialu_reg);
11458 %}
11459
11460 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11461 %{
11462 predicate(!UseAPX);
11463 match(Set dst (SubL zero dst));
11464 effect(KILL cr);
11465 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11466
11467 format %{ "negq $dst\t# long" %}
11468 ins_encode %{
11469 __ negq($dst$$Register);
11470 %}
11471 ins_pipe(ialu_reg);
11472 %}
11473
11474 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11475 %{
11476 predicate(UseAPX);
11477 match(Set dst (SubL zero src));
11478 effect(KILL cr);
11479 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11480
11481 format %{ "enegq $dst, $src\t# long ndd" %}
11482 ins_encode %{
11483 __ enegq($dst$$Register, $src$$Register, false);
11484 %}
11485 ins_pipe(ialu_reg);
11486 %}
11487
11488 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11489 %{
11490 predicate(!UseAPX);
11491 match(Set dst (NegL dst));
11492 effect(KILL cr);
11493 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11494
11495 format %{ "negq $dst\t# int" %}
11496 ins_encode %{
11497 __ negq($dst$$Register);
11498 %}
11499 ins_pipe(ialu_reg);
11500 %}
11501
11502 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11503 %{
11504 predicate(UseAPX);
11505 match(Set dst (NegL src));
11506 effect(KILL cr);
11507 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11508
11509 format %{ "enegq $dst, $src\t# long ndd" %}
11510 ins_encode %{
11511 __ enegq($dst$$Register, $src$$Register, false);
11512 %}
11513 ins_pipe(ialu_reg);
11514 %}
11515
11516 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11517 %{
11518 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11519 effect(KILL cr);
11520 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11521
11522 format %{ "negq $dst\t# long" %}
11523 ins_encode %{
11524 __ negq($dst$$Address);
11525 %}
11526 ins_pipe(ialu_reg);
11527 %}
11528
11529 //----------Multiplication/Division Instructions-------------------------------
11530 // Integer Multiplication Instructions
11531 // Multiply Register
11532
11533 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11534 %{
11535 predicate(!UseAPX);
11536 match(Set dst (MulI dst src));
11537 effect(KILL cr);
11538
11539 ins_cost(300);
11540 format %{ "imull $dst, $src\t# int" %}
11541 ins_encode %{
11542 __ imull($dst$$Register, $src$$Register);
11543 %}
11544 ins_pipe(ialu_reg_reg_alu0);
11545 %}
11546
11547 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11548 %{
11549 predicate(UseAPX);
11550 match(Set dst (MulI src1 src2));
11551 effect(KILL cr);
11552 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11553
11554 ins_cost(300);
11555 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11556 ins_encode %{
11557 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11558 %}
11559 ins_pipe(ialu_reg_reg_alu0);
11560 %}
11561
11562 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11563 %{
11564 match(Set dst (MulI src imm));
11565 effect(KILL cr);
11566
11567 ins_cost(300);
11568 format %{ "imull $dst, $src, $imm\t# int" %}
11569 ins_encode %{
11570 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11571 %}
11572 ins_pipe(ialu_reg_reg_alu0);
11573 %}
11574
11575 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11576 %{
11577 predicate(!UseAPX);
11578 match(Set dst (MulI dst (LoadI src)));
11579 effect(KILL cr);
11580
11581 ins_cost(350);
11582 format %{ "imull $dst, $src\t# int" %}
11583 ins_encode %{
11584 __ imull($dst$$Register, $src$$Address);
11585 %}
11586 ins_pipe(ialu_reg_mem_alu0);
11587 %}
11588
11589 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11590 %{
11591 predicate(UseAPX);
11592 match(Set dst (MulI src1 (LoadI src2)));
11593 effect(KILL cr);
11594 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11595
11596 ins_cost(350);
11597 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11598 ins_encode %{
11599 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11600 %}
11601 ins_pipe(ialu_reg_mem_alu0);
11602 %}
11603
11604 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11605 %{
11606 match(Set dst (MulI (LoadI src) imm));
11607 effect(KILL cr);
11608
11609 ins_cost(300);
11610 format %{ "imull $dst, $src, $imm\t# int" %}
11611 ins_encode %{
11612 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11613 %}
11614 ins_pipe(ialu_reg_mem_alu0);
11615 %}
11616
11617 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11618 %{
11619 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11620 effect(KILL cr, KILL src2);
11621
11622 expand %{ mulI_rReg(dst, src1, cr);
11623 mulI_rReg(src2, src3, cr);
11624 addI_rReg(dst, src2, cr); %}
11625 %}
11626
11627 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11628 %{
11629 predicate(!UseAPX);
11630 match(Set dst (MulL dst src));
11631 effect(KILL cr);
11632
11633 ins_cost(300);
11634 format %{ "imulq $dst, $src\t# long" %}
11635 ins_encode %{
11636 __ imulq($dst$$Register, $src$$Register);
11637 %}
11638 ins_pipe(ialu_reg_reg_alu0);
11639 %}
11640
11641 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11642 %{
11643 predicate(UseAPX);
11644 match(Set dst (MulL src1 src2));
11645 effect(KILL cr);
11646 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11647
11648 ins_cost(300);
11649 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11650 ins_encode %{
11651 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11652 %}
11653 ins_pipe(ialu_reg_reg_alu0);
11654 %}
11655
11656 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11657 %{
11658 match(Set dst (MulL src imm));
11659 effect(KILL cr);
11660
11661 ins_cost(300);
11662 format %{ "imulq $dst, $src, $imm\t# long" %}
11663 ins_encode %{
11664 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11665 %}
11666 ins_pipe(ialu_reg_reg_alu0);
11667 %}
11668
11669 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11670 %{
11671 predicate(!UseAPX);
11672 match(Set dst (MulL dst (LoadL src)));
11673 effect(KILL cr);
11674
11675 ins_cost(350);
11676 format %{ "imulq $dst, $src\t# long" %}
11677 ins_encode %{
11678 __ imulq($dst$$Register, $src$$Address);
11679 %}
11680 ins_pipe(ialu_reg_mem_alu0);
11681 %}
11682
11683 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11684 %{
11685 predicate(UseAPX);
11686 match(Set dst (MulL src1 (LoadL src2)));
11687 effect(KILL cr);
11688 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11689
11690 ins_cost(350);
11691 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11692 ins_encode %{
11693 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11694 %}
11695 ins_pipe(ialu_reg_mem_alu0);
11696 %}
11697
11698 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11699 %{
11700 match(Set dst (MulL (LoadL src) imm));
11701 effect(KILL cr);
11702
11703 ins_cost(300);
11704 format %{ "imulq $dst, $src, $imm\t# long" %}
11705 ins_encode %{
11706 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11707 %}
11708 ins_pipe(ialu_reg_mem_alu0);
11709 %}
11710
11711 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11712 %{
11713 match(Set dst (MulHiL src rax));
11714 effect(USE_KILL rax, KILL cr);
11715
11716 ins_cost(300);
11717 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11718 ins_encode %{
11719 __ imulq($src$$Register);
11720 %}
11721 ins_pipe(ialu_reg_reg_alu0);
11722 %}
11723
11724 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11725 %{
11726 match(Set dst (UMulHiL src rax));
11727 effect(USE_KILL rax, KILL cr);
11728
11729 ins_cost(300);
11730 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11731 ins_encode %{
11732 __ mulq($src$$Register);
11733 %}
11734 ins_pipe(ialu_reg_reg_alu0);
11735 %}
11736
11737 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11738 rFlagsReg cr)
11739 %{
11740 match(Set rax (DivI rax div));
11741 effect(KILL rdx, KILL cr);
11742
11743 ins_cost(30*100+10*100); // XXX
11744 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11745 "jne,s normal\n\t"
11746 "xorl rdx, rdx\n\t"
11747 "cmpl $div, -1\n\t"
11748 "je,s done\n"
11749 "normal: cdql\n\t"
11750 "idivl $div\n"
11751 "done:" %}
11752 ins_encode(cdql_enc(div));
11753 ins_pipe(ialu_reg_reg_alu0);
11754 %}
11755
11756 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11757 rFlagsReg cr)
11758 %{
11759 match(Set rax (DivL rax div));
11760 effect(KILL rdx, KILL cr);
11761
11762 ins_cost(30*100+10*100); // XXX
11763 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11764 "cmpq rax, rdx\n\t"
11765 "jne,s normal\n\t"
11766 "xorl rdx, rdx\n\t"
11767 "cmpq $div, -1\n\t"
11768 "je,s done\n"
11769 "normal: cdqq\n\t"
11770 "idivq $div\n"
11771 "done:" %}
11772 ins_encode(cdqq_enc(div));
11773 ins_pipe(ialu_reg_reg_alu0);
11774 %}
11775
11776 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11777 %{
11778 match(Set rax (UDivI rax div));
11779 effect(KILL rdx, KILL cr);
11780
11781 ins_cost(300);
11782 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11783 ins_encode %{
11784 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11785 %}
11786 ins_pipe(ialu_reg_reg_alu0);
11787 %}
11788
11789 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11790 %{
11791 match(Set rax (UDivL rax div));
11792 effect(KILL rdx, KILL cr);
11793
11794 ins_cost(300);
11795 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11796 ins_encode %{
11797 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11798 %}
11799 ins_pipe(ialu_reg_reg_alu0);
11800 %}
11801
11802 // Integer DIVMOD with Register, both quotient and mod results
11803 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11804 rFlagsReg cr)
11805 %{
11806 match(DivModI rax div);
11807 effect(KILL cr);
11808
11809 ins_cost(30*100+10*100); // XXX
11810 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11811 "jne,s normal\n\t"
11812 "xorl rdx, rdx\n\t"
11813 "cmpl $div, -1\n\t"
11814 "je,s done\n"
11815 "normal: cdql\n\t"
11816 "idivl $div\n"
11817 "done:" %}
11818 ins_encode(cdql_enc(div));
11819 ins_pipe(pipe_slow);
11820 %}
11821
11822 // Long DIVMOD with Register, both quotient and mod results
11823 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11824 rFlagsReg cr)
11825 %{
11826 match(DivModL rax div);
11827 effect(KILL cr);
11828
11829 ins_cost(30*100+10*100); // XXX
11830 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11831 "cmpq rax, rdx\n\t"
11832 "jne,s normal\n\t"
11833 "xorl rdx, rdx\n\t"
11834 "cmpq $div, -1\n\t"
11835 "je,s done\n"
11836 "normal: cdqq\n\t"
11837 "idivq $div\n"
11838 "done:" %}
11839 ins_encode(cdqq_enc(div));
11840 ins_pipe(pipe_slow);
11841 %}
11842
11843 // Unsigned integer DIVMOD with Register, both quotient and mod results
11844 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11845 no_rax_rdx_RegI div, rFlagsReg cr)
11846 %{
11847 match(UDivModI rax div);
11848 effect(TEMP tmp, KILL cr);
11849
11850 ins_cost(300);
11851 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11852 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11853 %}
11854 ins_encode %{
11855 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11856 %}
11857 ins_pipe(pipe_slow);
11858 %}
11859
11860 // Unsigned long DIVMOD with Register, both quotient and mod results
11861 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11862 no_rax_rdx_RegL div, rFlagsReg cr)
11863 %{
11864 match(UDivModL rax div);
11865 effect(TEMP tmp, KILL cr);
11866
11867 ins_cost(300);
11868 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11869 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11870 %}
11871 ins_encode %{
11872 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11873 %}
11874 ins_pipe(pipe_slow);
11875 %}
11876
11877 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11878 rFlagsReg cr)
11879 %{
11880 match(Set rdx (ModI rax div));
11881 effect(KILL rax, KILL cr);
11882
11883 ins_cost(300); // XXX
11884 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11885 "jne,s normal\n\t"
11886 "xorl rdx, rdx\n\t"
11887 "cmpl $div, -1\n\t"
11888 "je,s done\n"
11889 "normal: cdql\n\t"
11890 "idivl $div\n"
11891 "done:" %}
11892 ins_encode(cdql_enc(div));
11893 ins_pipe(ialu_reg_reg_alu0);
11894 %}
11895
11896 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11897 rFlagsReg cr)
11898 %{
11899 match(Set rdx (ModL rax div));
11900 effect(KILL rax, KILL cr);
11901
11902 ins_cost(300); // XXX
11903 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11904 "cmpq rax, rdx\n\t"
11905 "jne,s normal\n\t"
11906 "xorl rdx, rdx\n\t"
11907 "cmpq $div, -1\n\t"
11908 "je,s done\n"
11909 "normal: cdqq\n\t"
11910 "idivq $div\n"
11911 "done:" %}
11912 ins_encode(cdqq_enc(div));
11913 ins_pipe(ialu_reg_reg_alu0);
11914 %}
11915
11916 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11917 %{
11918 match(Set rdx (UModI rax div));
11919 effect(KILL rax, KILL cr);
11920
11921 ins_cost(300);
11922 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11923 ins_encode %{
11924 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11925 %}
11926 ins_pipe(ialu_reg_reg_alu0);
11927 %}
11928
11929 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11930 %{
11931 match(Set rdx (UModL rax div));
11932 effect(KILL rax, KILL cr);
11933
11934 ins_cost(300);
11935 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11936 ins_encode %{
11937 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11938 %}
11939 ins_pipe(ialu_reg_reg_alu0);
11940 %}
11941
11942 // Integer Shift Instructions
11943 // Shift Left by one, two, three
11944 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11945 %{
11946 predicate(!UseAPX);
11947 match(Set dst (LShiftI dst shift));
11948 effect(KILL cr);
11949
11950 format %{ "sall $dst, $shift" %}
11951 ins_encode %{
11952 __ sall($dst$$Register, $shift$$constant);
11953 %}
11954 ins_pipe(ialu_reg);
11955 %}
11956
11957 // Shift Left by one, two, three
11958 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11959 %{
11960 predicate(UseAPX);
11961 match(Set dst (LShiftI src shift));
11962 effect(KILL cr);
11963 flag(PD::Flag_ndd_demotable_opr1);
11964
11965 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11966 ins_encode %{
11967 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11968 %}
11969 ins_pipe(ialu_reg);
11970 %}
11971
11972 // Shift Left by 8-bit immediate
11973 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11974 %{
11975 predicate(!UseAPX);
11976 match(Set dst (LShiftI dst shift));
11977 effect(KILL cr);
11978
11979 format %{ "sall $dst, $shift" %}
11980 ins_encode %{
11981 __ sall($dst$$Register, $shift$$constant);
11982 %}
11983 ins_pipe(ialu_reg);
11984 %}
11985
11986 // Shift Left by 8-bit immediate
11987 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11988 %{
11989 predicate(UseAPX);
11990 match(Set dst (LShiftI src shift));
11991 effect(KILL cr);
11992 flag(PD::Flag_ndd_demotable_opr1);
11993
11994 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11995 ins_encode %{
11996 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11997 %}
11998 ins_pipe(ialu_reg);
11999 %}
12000
12001 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12002 %{
12003 predicate(UseAPX);
12004 match(Set dst (LShiftI (LoadI src) shift));
12005 effect(KILL cr);
12006
12007 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
12008 ins_encode %{
12009 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
12010 %}
12011 ins_pipe(ialu_reg);
12012 %}
12013
12014 // Shift Left by 8-bit immediate
12015 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12016 %{
12017 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12018 effect(KILL cr);
12019
12020 format %{ "sall $dst, $shift" %}
12021 ins_encode %{
12022 __ sall($dst$$Address, $shift$$constant);
12023 %}
12024 ins_pipe(ialu_mem_imm);
12025 %}
12026
12027 // Shift Left by variable
12028 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12029 %{
12030 predicate(!VM_Version::supports_bmi2());
12031 match(Set dst (LShiftI dst shift));
12032 effect(KILL cr);
12033
12034 format %{ "sall $dst, $shift" %}
12035 ins_encode %{
12036 __ sall($dst$$Register);
12037 %}
12038 ins_pipe(ialu_reg_reg);
12039 %}
12040
12041 // Shift Left by variable
12042 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12043 %{
12044 predicate(!VM_Version::supports_bmi2());
12045 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12046 effect(KILL cr);
12047
12048 format %{ "sall $dst, $shift" %}
12049 ins_encode %{
12050 __ sall($dst$$Address);
12051 %}
12052 ins_pipe(ialu_mem_reg);
12053 %}
12054
12055 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12056 %{
12057 predicate(VM_Version::supports_bmi2());
12058 match(Set dst (LShiftI src shift));
12059
12060 format %{ "shlxl $dst, $src, $shift" %}
12061 ins_encode %{
12062 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12063 %}
12064 ins_pipe(ialu_reg_reg);
12065 %}
12066
12067 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12068 %{
12069 predicate(VM_Version::supports_bmi2());
12070 match(Set dst (LShiftI (LoadI src) shift));
12071 ins_cost(175);
12072 format %{ "shlxl $dst, $src, $shift" %}
12073 ins_encode %{
12074 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12075 %}
12076 ins_pipe(ialu_reg_mem);
12077 %}
12078
12079 // Arithmetic Shift Right by 8-bit immediate
12080 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12081 %{
12082 predicate(!UseAPX);
12083 match(Set dst (RShiftI dst shift));
12084 effect(KILL cr);
12085
12086 format %{ "sarl $dst, $shift" %}
12087 ins_encode %{
12088 __ sarl($dst$$Register, $shift$$constant);
12089 %}
12090 ins_pipe(ialu_mem_imm);
12091 %}
12092
12093 // Arithmetic Shift Right by 8-bit immediate
12094 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12095 %{
12096 predicate(UseAPX);
12097 match(Set dst (RShiftI src shift));
12098 effect(KILL cr);
12099 flag(PD::Flag_ndd_demotable_opr1);
12100
12101 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12102 ins_encode %{
12103 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12104 %}
12105 ins_pipe(ialu_mem_imm);
12106 %}
12107
12108 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12109 %{
12110 predicate(UseAPX);
12111 match(Set dst (RShiftI (LoadI src) shift));
12112 effect(KILL cr);
12113
12114 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12115 ins_encode %{
12116 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12117 %}
12118 ins_pipe(ialu_mem_imm);
12119 %}
12120
12121 // Arithmetic Shift Right by 8-bit immediate
12122 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12123 %{
12124 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12125 effect(KILL cr);
12126
12127 format %{ "sarl $dst, $shift" %}
12128 ins_encode %{
12129 __ sarl($dst$$Address, $shift$$constant);
12130 %}
12131 ins_pipe(ialu_mem_imm);
12132 %}
12133
12134 // Arithmetic Shift Right by variable
12135 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12136 %{
12137 predicate(!VM_Version::supports_bmi2());
12138 match(Set dst (RShiftI dst shift));
12139 effect(KILL cr);
12140
12141 format %{ "sarl $dst, $shift" %}
12142 ins_encode %{
12143 __ sarl($dst$$Register);
12144 %}
12145 ins_pipe(ialu_reg_reg);
12146 %}
12147
12148 // Arithmetic Shift Right by variable
12149 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12150 %{
12151 predicate(!VM_Version::supports_bmi2());
12152 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12153 effect(KILL cr);
12154
12155 format %{ "sarl $dst, $shift" %}
12156 ins_encode %{
12157 __ sarl($dst$$Address);
12158 %}
12159 ins_pipe(ialu_mem_reg);
12160 %}
12161
12162 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12163 %{
12164 predicate(VM_Version::supports_bmi2());
12165 match(Set dst (RShiftI src shift));
12166
12167 format %{ "sarxl $dst, $src, $shift" %}
12168 ins_encode %{
12169 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12170 %}
12171 ins_pipe(ialu_reg_reg);
12172 %}
12173
12174 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12175 %{
12176 predicate(VM_Version::supports_bmi2());
12177 match(Set dst (RShiftI (LoadI src) shift));
12178 ins_cost(175);
12179 format %{ "sarxl $dst, $src, $shift" %}
12180 ins_encode %{
12181 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12182 %}
12183 ins_pipe(ialu_reg_mem);
12184 %}
12185
12186 // Logical Shift Right by 8-bit immediate
12187 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12188 %{
12189 predicate(!UseAPX);
12190 match(Set dst (URShiftI dst shift));
12191 effect(KILL cr);
12192
12193 format %{ "shrl $dst, $shift" %}
12194 ins_encode %{
12195 __ shrl($dst$$Register, $shift$$constant);
12196 %}
12197 ins_pipe(ialu_reg);
12198 %}
12199
12200 // Logical Shift Right by 8-bit immediate
12201 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12202 %{
12203 predicate(UseAPX);
12204 match(Set dst (URShiftI src shift));
12205 effect(KILL cr);
12206 flag(PD::Flag_ndd_demotable_opr1);
12207
12208 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12209 ins_encode %{
12210 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12211 %}
12212 ins_pipe(ialu_reg);
12213 %}
12214
12215 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12216 %{
12217 predicate(UseAPX);
12218 match(Set dst (URShiftI (LoadI src) shift));
12219 effect(KILL cr);
12220
12221 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12222 ins_encode %{
12223 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12224 %}
12225 ins_pipe(ialu_reg);
12226 %}
12227
12228 // Logical Shift Right by 8-bit immediate
12229 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12230 %{
12231 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12232 effect(KILL cr);
12233
12234 format %{ "shrl $dst, $shift" %}
12235 ins_encode %{
12236 __ shrl($dst$$Address, $shift$$constant);
12237 %}
12238 ins_pipe(ialu_mem_imm);
12239 %}
12240
12241 // Logical Shift Right by variable
12242 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12243 %{
12244 predicate(!VM_Version::supports_bmi2());
12245 match(Set dst (URShiftI dst shift));
12246 effect(KILL cr);
12247
12248 format %{ "shrl $dst, $shift" %}
12249 ins_encode %{
12250 __ shrl($dst$$Register);
12251 %}
12252 ins_pipe(ialu_reg_reg);
12253 %}
12254
12255 // Logical Shift Right by variable
12256 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12257 %{
12258 predicate(!VM_Version::supports_bmi2());
12259 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12260 effect(KILL cr);
12261
12262 format %{ "shrl $dst, $shift" %}
12263 ins_encode %{
12264 __ shrl($dst$$Address);
12265 %}
12266 ins_pipe(ialu_mem_reg);
12267 %}
12268
12269 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12270 %{
12271 predicate(VM_Version::supports_bmi2());
12272 match(Set dst (URShiftI src shift));
12273
12274 format %{ "shrxl $dst, $src, $shift" %}
12275 ins_encode %{
12276 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12277 %}
12278 ins_pipe(ialu_reg_reg);
12279 %}
12280
12281 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12282 %{
12283 predicate(VM_Version::supports_bmi2());
12284 match(Set dst (URShiftI (LoadI src) shift));
12285 ins_cost(175);
12286 format %{ "shrxl $dst, $src, $shift" %}
12287 ins_encode %{
12288 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12289 %}
12290 ins_pipe(ialu_reg_mem);
12291 %}
12292
12293 // Long Shift Instructions
12294 // Shift Left by one, two, three
12295 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12296 %{
12297 predicate(!UseAPX);
12298 match(Set dst (LShiftL dst shift));
12299 effect(KILL cr);
12300
12301 format %{ "salq $dst, $shift" %}
12302 ins_encode %{
12303 __ salq($dst$$Register, $shift$$constant);
12304 %}
12305 ins_pipe(ialu_reg);
12306 %}
12307
12308 // Shift Left by one, two, three
12309 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12310 %{
12311 predicate(UseAPX);
12312 match(Set dst (LShiftL src shift));
12313 effect(KILL cr);
12314 flag(PD::Flag_ndd_demotable_opr1);
12315
12316 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12317 ins_encode %{
12318 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12319 %}
12320 ins_pipe(ialu_reg);
12321 %}
12322
12323 // Shift Left by 8-bit immediate
12324 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12325 %{
12326 predicate(!UseAPX);
12327 match(Set dst (LShiftL dst shift));
12328 effect(KILL cr);
12329
12330 format %{ "salq $dst, $shift" %}
12331 ins_encode %{
12332 __ salq($dst$$Register, $shift$$constant);
12333 %}
12334 ins_pipe(ialu_reg);
12335 %}
12336
12337 // Shift Left by 8-bit immediate
12338 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12339 %{
12340 predicate(UseAPX);
12341 match(Set dst (LShiftL src shift));
12342 effect(KILL cr);
12343 flag(PD::Flag_ndd_demotable_opr1);
12344
12345 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12346 ins_encode %{
12347 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12348 %}
12349 ins_pipe(ialu_reg);
12350 %}
12351
12352 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12353 %{
12354 predicate(UseAPX);
12355 match(Set dst (LShiftL (LoadL src) shift));
12356 effect(KILL cr);
12357
12358 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12359 ins_encode %{
12360 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12361 %}
12362 ins_pipe(ialu_reg);
12363 %}
12364
12365 // Shift Left by 8-bit immediate
12366 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12367 %{
12368 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12369 effect(KILL cr);
12370
12371 format %{ "salq $dst, $shift" %}
12372 ins_encode %{
12373 __ salq($dst$$Address, $shift$$constant);
12374 %}
12375 ins_pipe(ialu_mem_imm);
12376 %}
12377
12378 // Shift Left by variable
12379 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12380 %{
12381 predicate(!VM_Version::supports_bmi2());
12382 match(Set dst (LShiftL dst shift));
12383 effect(KILL cr);
12384
12385 format %{ "salq $dst, $shift" %}
12386 ins_encode %{
12387 __ salq($dst$$Register);
12388 %}
12389 ins_pipe(ialu_reg_reg);
12390 %}
12391
12392 // Shift Left by variable
12393 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12394 %{
12395 predicate(!VM_Version::supports_bmi2());
12396 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12397 effect(KILL cr);
12398
12399 format %{ "salq $dst, $shift" %}
12400 ins_encode %{
12401 __ salq($dst$$Address);
12402 %}
12403 ins_pipe(ialu_mem_reg);
12404 %}
12405
12406 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12407 %{
12408 predicate(VM_Version::supports_bmi2());
12409 match(Set dst (LShiftL src shift));
12410
12411 format %{ "shlxq $dst, $src, $shift" %}
12412 ins_encode %{
12413 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12414 %}
12415 ins_pipe(ialu_reg_reg);
12416 %}
12417
12418 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12419 %{
12420 predicate(VM_Version::supports_bmi2());
12421 match(Set dst (LShiftL (LoadL src) shift));
12422 ins_cost(175);
12423 format %{ "shlxq $dst, $src, $shift" %}
12424 ins_encode %{
12425 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12426 %}
12427 ins_pipe(ialu_reg_mem);
12428 %}
12429
12430 // Arithmetic Shift Right by 8-bit immediate
12431 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12432 %{
12433 predicate(!UseAPX);
12434 match(Set dst (RShiftL dst shift));
12435 effect(KILL cr);
12436
12437 format %{ "sarq $dst, $shift" %}
12438 ins_encode %{
12439 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12440 %}
12441 ins_pipe(ialu_mem_imm);
12442 %}
12443
12444 // Arithmetic Shift Right by 8-bit immediate
12445 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12446 %{
12447 predicate(UseAPX);
12448 match(Set dst (RShiftL src shift));
12449 effect(KILL cr);
12450 flag(PD::Flag_ndd_demotable_opr1);
12451
12452 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12453 ins_encode %{
12454 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12455 %}
12456 ins_pipe(ialu_mem_imm);
12457 %}
12458
12459 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12460 %{
12461 predicate(UseAPX);
12462 match(Set dst (RShiftL (LoadL src) shift));
12463 effect(KILL cr);
12464
12465 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12466 ins_encode %{
12467 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12468 %}
12469 ins_pipe(ialu_mem_imm);
12470 %}
12471
12472 // Arithmetic Shift Right by 8-bit immediate
12473 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12474 %{
12475 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12476 effect(KILL cr);
12477
12478 format %{ "sarq $dst, $shift" %}
12479 ins_encode %{
12480 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12481 %}
12482 ins_pipe(ialu_mem_imm);
12483 %}
12484
12485 // Arithmetic Shift Right by variable
12486 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12487 %{
12488 predicate(!VM_Version::supports_bmi2());
12489 match(Set dst (RShiftL dst shift));
12490 effect(KILL cr);
12491
12492 format %{ "sarq $dst, $shift" %}
12493 ins_encode %{
12494 __ sarq($dst$$Register);
12495 %}
12496 ins_pipe(ialu_reg_reg);
12497 %}
12498
12499 // Arithmetic Shift Right by variable
12500 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12501 %{
12502 predicate(!VM_Version::supports_bmi2());
12503 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12504 effect(KILL cr);
12505
12506 format %{ "sarq $dst, $shift" %}
12507 ins_encode %{
12508 __ sarq($dst$$Address);
12509 %}
12510 ins_pipe(ialu_mem_reg);
12511 %}
12512
12513 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12514 %{
12515 predicate(VM_Version::supports_bmi2());
12516 match(Set dst (RShiftL src shift));
12517
12518 format %{ "sarxq $dst, $src, $shift" %}
12519 ins_encode %{
12520 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12521 %}
12522 ins_pipe(ialu_reg_reg);
12523 %}
12524
12525 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12526 %{
12527 predicate(VM_Version::supports_bmi2());
12528 match(Set dst (RShiftL (LoadL src) shift));
12529 ins_cost(175);
12530 format %{ "sarxq $dst, $src, $shift" %}
12531 ins_encode %{
12532 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12533 %}
12534 ins_pipe(ialu_reg_mem);
12535 %}
12536
12537 // Logical Shift Right by 8-bit immediate
12538 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12539 %{
12540 predicate(!UseAPX);
12541 match(Set dst (URShiftL dst shift));
12542 effect(KILL cr);
12543
12544 format %{ "shrq $dst, $shift" %}
12545 ins_encode %{
12546 __ shrq($dst$$Register, $shift$$constant);
12547 %}
12548 ins_pipe(ialu_reg);
12549 %}
12550
12551 // Logical Shift Right by 8-bit immediate
12552 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12553 %{
12554 predicate(UseAPX);
12555 match(Set dst (URShiftL src shift));
12556 effect(KILL cr);
12557 flag(PD::Flag_ndd_demotable_opr1);
12558
12559 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12560 ins_encode %{
12561 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12562 %}
12563 ins_pipe(ialu_reg);
12564 %}
12565
12566 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12567 %{
12568 predicate(UseAPX);
12569 match(Set dst (URShiftL (LoadL src) shift));
12570 effect(KILL cr);
12571
12572 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12573 ins_encode %{
12574 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12575 %}
12576 ins_pipe(ialu_reg);
12577 %}
12578
12579 // Logical Shift Right by 8-bit immediate
12580 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12581 %{
12582 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12583 effect(KILL cr);
12584
12585 format %{ "shrq $dst, $shift" %}
12586 ins_encode %{
12587 __ shrq($dst$$Address, $shift$$constant);
12588 %}
12589 ins_pipe(ialu_mem_imm);
12590 %}
12591
12592 // Logical Shift Right by variable
12593 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12594 %{
12595 predicate(!VM_Version::supports_bmi2());
12596 match(Set dst (URShiftL dst shift));
12597 effect(KILL cr);
12598
12599 format %{ "shrq $dst, $shift" %}
12600 ins_encode %{
12601 __ shrq($dst$$Register);
12602 %}
12603 ins_pipe(ialu_reg_reg);
12604 %}
12605
12606 // Logical Shift Right by variable
12607 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12608 %{
12609 predicate(!VM_Version::supports_bmi2());
12610 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12611 effect(KILL cr);
12612
12613 format %{ "shrq $dst, $shift" %}
12614 ins_encode %{
12615 __ shrq($dst$$Address);
12616 %}
12617 ins_pipe(ialu_mem_reg);
12618 %}
12619
12620 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12621 %{
12622 predicate(VM_Version::supports_bmi2());
12623 match(Set dst (URShiftL src shift));
12624
12625 format %{ "shrxq $dst, $src, $shift" %}
12626 ins_encode %{
12627 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12628 %}
12629 ins_pipe(ialu_reg_reg);
12630 %}
12631
12632 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12633 %{
12634 predicate(VM_Version::supports_bmi2());
12635 match(Set dst (URShiftL (LoadL src) shift));
12636 ins_cost(175);
12637 format %{ "shrxq $dst, $src, $shift" %}
12638 ins_encode %{
12639 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12640 %}
12641 ins_pipe(ialu_reg_mem);
12642 %}
12643
12644 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12645 // This idiom is used by the compiler for the i2b bytecode.
12646 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12647 %{
12648 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12649
12650 format %{ "movsbl $dst, $src\t# i2b" %}
12651 ins_encode %{
12652 __ movsbl($dst$$Register, $src$$Register);
12653 %}
12654 ins_pipe(ialu_reg_reg);
12655 %}
12656
12657 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12658 // This idiom is used by the compiler the i2s bytecode.
12659 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12660 %{
12661 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12662
12663 format %{ "movswl $dst, $src\t# i2s" %}
12664 ins_encode %{
12665 __ movswl($dst$$Register, $src$$Register);
12666 %}
12667 ins_pipe(ialu_reg_reg);
12668 %}
12669
12670 // ROL/ROR instructions
12671
12672 // Rotate left by constant.
12673 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12674 %{
12675 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12676 match(Set dst (RotateLeft dst shift));
12677 effect(KILL cr);
12678 format %{ "roll $dst, $shift" %}
12679 ins_encode %{
12680 __ roll($dst$$Register, $shift$$constant);
12681 %}
12682 ins_pipe(ialu_reg);
12683 %}
12684
12685 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12686 %{
12687 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12688 match(Set dst (RotateLeft src shift));
12689 format %{ "rolxl $dst, $src, $shift" %}
12690 ins_encode %{
12691 int shift = 32 - ($shift$$constant & 31);
12692 __ rorxl($dst$$Register, $src$$Register, shift);
12693 %}
12694 ins_pipe(ialu_reg_reg);
12695 %}
12696
12697 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12698 %{
12699 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12700 match(Set dst (RotateLeft (LoadI src) shift));
12701 ins_cost(175);
12702 format %{ "rolxl $dst, $src, $shift" %}
12703 ins_encode %{
12704 int shift = 32 - ($shift$$constant & 31);
12705 __ rorxl($dst$$Register, $src$$Address, shift);
12706 %}
12707 ins_pipe(ialu_reg_mem);
12708 %}
12709
12710 // Rotate Left by variable
12711 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12712 %{
12713 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12714 match(Set dst (RotateLeft dst shift));
12715 effect(KILL cr);
12716 format %{ "roll $dst, $shift" %}
12717 ins_encode %{
12718 __ roll($dst$$Register);
12719 %}
12720 ins_pipe(ialu_reg_reg);
12721 %}
12722
12723 // Rotate Left by variable
12724 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12725 %{
12726 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12727 match(Set dst (RotateLeft src shift));
12728 effect(KILL cr);
12729 flag(PD::Flag_ndd_demotable_opr1);
12730
12731 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12732 ins_encode %{
12733 __ eroll($dst$$Register, $src$$Register, false);
12734 %}
12735 ins_pipe(ialu_reg_reg);
12736 %}
12737
12738 // Rotate Right by constant.
12739 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12740 %{
12741 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12742 match(Set dst (RotateRight dst shift));
12743 effect(KILL cr);
12744 format %{ "rorl $dst, $shift" %}
12745 ins_encode %{
12746 __ rorl($dst$$Register, $shift$$constant);
12747 %}
12748 ins_pipe(ialu_reg);
12749 %}
12750
12751 // Rotate Right by constant.
12752 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12753 %{
12754 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12755 match(Set dst (RotateRight src shift));
12756 format %{ "rorxl $dst, $src, $shift" %}
12757 ins_encode %{
12758 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12759 %}
12760 ins_pipe(ialu_reg_reg);
12761 %}
12762
12763 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12764 %{
12765 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12766 match(Set dst (RotateRight (LoadI src) shift));
12767 ins_cost(175);
12768 format %{ "rorxl $dst, $src, $shift" %}
12769 ins_encode %{
12770 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12771 %}
12772 ins_pipe(ialu_reg_mem);
12773 %}
12774
12775 // Rotate Right by variable
12776 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12777 %{
12778 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12779 match(Set dst (RotateRight dst shift));
12780 effect(KILL cr);
12781 format %{ "rorl $dst, $shift" %}
12782 ins_encode %{
12783 __ rorl($dst$$Register);
12784 %}
12785 ins_pipe(ialu_reg_reg);
12786 %}
12787
12788 // Rotate Right by variable
12789 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12790 %{
12791 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12792 match(Set dst (RotateRight src shift));
12793 effect(KILL cr);
12794 flag(PD::Flag_ndd_demotable_opr1);
12795
12796 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12797 ins_encode %{
12798 __ erorl($dst$$Register, $src$$Register, false);
12799 %}
12800 ins_pipe(ialu_reg_reg);
12801 %}
12802
12803 // Rotate Left by constant.
12804 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12805 %{
12806 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12807 match(Set dst (RotateLeft dst shift));
12808 effect(KILL cr);
12809 format %{ "rolq $dst, $shift" %}
12810 ins_encode %{
12811 __ rolq($dst$$Register, $shift$$constant);
12812 %}
12813 ins_pipe(ialu_reg);
12814 %}
12815
12816 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12817 %{
12818 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12819 match(Set dst (RotateLeft src shift));
12820 format %{ "rolxq $dst, $src, $shift" %}
12821 ins_encode %{
12822 int shift = 64 - ($shift$$constant & 63);
12823 __ rorxq($dst$$Register, $src$$Register, shift);
12824 %}
12825 ins_pipe(ialu_reg_reg);
12826 %}
12827
12828 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12829 %{
12830 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12831 match(Set dst (RotateLeft (LoadL src) shift));
12832 ins_cost(175);
12833 format %{ "rolxq $dst, $src, $shift" %}
12834 ins_encode %{
12835 int shift = 64 - ($shift$$constant & 63);
12836 __ rorxq($dst$$Register, $src$$Address, shift);
12837 %}
12838 ins_pipe(ialu_reg_mem);
12839 %}
12840
12841 // Rotate Left by variable
12842 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12843 %{
12844 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12845 match(Set dst (RotateLeft dst shift));
12846 effect(KILL cr);
12847
12848 format %{ "rolq $dst, $shift" %}
12849 ins_encode %{
12850 __ rolq($dst$$Register);
12851 %}
12852 ins_pipe(ialu_reg_reg);
12853 %}
12854
12855 // Rotate Left by variable
12856 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12857 %{
12858 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12859 match(Set dst (RotateLeft src shift));
12860 effect(KILL cr);
12861 flag(PD::Flag_ndd_demotable_opr1);
12862
12863 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12864 ins_encode %{
12865 __ erolq($dst$$Register, $src$$Register, false);
12866 %}
12867 ins_pipe(ialu_reg_reg);
12868 %}
12869
12870 // Rotate Right by constant.
12871 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12872 %{
12873 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12874 match(Set dst (RotateRight dst shift));
12875 effect(KILL cr);
12876 format %{ "rorq $dst, $shift" %}
12877 ins_encode %{
12878 __ rorq($dst$$Register, $shift$$constant);
12879 %}
12880 ins_pipe(ialu_reg);
12881 %}
12882
12883 // Rotate Right by constant
12884 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12885 %{
12886 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12887 match(Set dst (RotateRight src shift));
12888 format %{ "rorxq $dst, $src, $shift" %}
12889 ins_encode %{
12890 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12891 %}
12892 ins_pipe(ialu_reg_reg);
12893 %}
12894
12895 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12896 %{
12897 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12898 match(Set dst (RotateRight (LoadL src) shift));
12899 ins_cost(175);
12900 format %{ "rorxq $dst, $src, $shift" %}
12901 ins_encode %{
12902 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12903 %}
12904 ins_pipe(ialu_reg_mem);
12905 %}
12906
12907 // Rotate Right by variable
12908 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12909 %{
12910 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12911 match(Set dst (RotateRight dst shift));
12912 effect(KILL cr);
12913 format %{ "rorq $dst, $shift" %}
12914 ins_encode %{
12915 __ rorq($dst$$Register);
12916 %}
12917 ins_pipe(ialu_reg_reg);
12918 %}
12919
12920 // Rotate Right by variable
12921 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12922 %{
12923 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12924 match(Set dst (RotateRight src shift));
12925 effect(KILL cr);
12926 flag(PD::Flag_ndd_demotable_opr1);
12927
12928 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12929 ins_encode %{
12930 __ erorq($dst$$Register, $src$$Register, false);
12931 %}
12932 ins_pipe(ialu_reg_reg);
12933 %}
12934
12935 //----------------------------- CompressBits/ExpandBits ------------------------
12936
12937 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12938 predicate(n->bottom_type()->isa_long());
12939 match(Set dst (CompressBits src mask));
12940 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12941 ins_encode %{
12942 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12943 %}
12944 ins_pipe( pipe_slow );
12945 %}
12946
12947 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12948 predicate(n->bottom_type()->isa_long());
12949 match(Set dst (ExpandBits src mask));
12950 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12951 ins_encode %{
12952 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12953 %}
12954 ins_pipe( pipe_slow );
12955 %}
12956
12957 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12958 predicate(n->bottom_type()->isa_long());
12959 match(Set dst (CompressBits src (LoadL mask)));
12960 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12961 ins_encode %{
12962 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12963 %}
12964 ins_pipe( pipe_slow );
12965 %}
12966
12967 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12968 predicate(n->bottom_type()->isa_long());
12969 match(Set dst (ExpandBits src (LoadL mask)));
12970 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12971 ins_encode %{
12972 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12973 %}
12974 ins_pipe( pipe_slow );
12975 %}
12976
12977
12978 // Logical Instructions
12979
12980 // Integer Logical Instructions
12981
12982 // And Instructions
12983 // And Register with Register
12984 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12985 %{
12986 predicate(!UseAPX);
12987 match(Set dst (AndI dst src));
12988 effect(KILL cr);
12989 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12990
12991 format %{ "andl $dst, $src\t# int" %}
12992 ins_encode %{
12993 __ andl($dst$$Register, $src$$Register);
12994 %}
12995 ins_pipe(ialu_reg_reg);
12996 %}
12997
12998 // And Register with Register using New Data Destination (NDD)
12999 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13000 %{
13001 predicate(UseAPX);
13002 match(Set dst (AndI src1 src2));
13003 effect(KILL cr);
13004 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13005
13006 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13007 ins_encode %{
13008 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
13009
13010 %}
13011 ins_pipe(ialu_reg_reg);
13012 %}
13013
13014 // And Register with Immediate 255
13015 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
13016 %{
13017 match(Set dst (AndI src mask));
13018
13019 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
13020 ins_encode %{
13021 __ movzbl($dst$$Register, $src$$Register);
13022 %}
13023 ins_pipe(ialu_reg);
13024 %}
13025
13026 // And Register with Immediate 255 and promote to long
13027 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13028 %{
13029 match(Set dst (ConvI2L (AndI src mask)));
13030
13031 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13032 ins_encode %{
13033 __ movzbl($dst$$Register, $src$$Register);
13034 %}
13035 ins_pipe(ialu_reg);
13036 %}
13037
13038 // And Register with Immediate 65535
13039 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13040 %{
13041 match(Set dst (AndI src mask));
13042
13043 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13044 ins_encode %{
13045 __ movzwl($dst$$Register, $src$$Register);
13046 %}
13047 ins_pipe(ialu_reg);
13048 %}
13049
13050 // And Register with Immediate 65535 and promote to long
13051 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13052 %{
13053 match(Set dst (ConvI2L (AndI src mask)));
13054
13055 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13056 ins_encode %{
13057 __ movzwl($dst$$Register, $src$$Register);
13058 %}
13059 ins_pipe(ialu_reg);
13060 %}
13061
13062 // Can skip int2long conversions after AND with small bitmask
13063 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13064 %{
13065 predicate(VM_Version::supports_bmi2());
13066 ins_cost(125);
13067 effect(TEMP tmp, KILL cr);
13068 match(Set dst (ConvI2L (AndI src mask)));
13069 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13070 ins_encode %{
13071 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13072 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13073 %}
13074 ins_pipe(ialu_reg_reg);
13075 %}
13076
13077 // And Register with Immediate
13078 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13079 %{
13080 predicate(!UseAPX);
13081 match(Set dst (AndI dst src));
13082 effect(KILL cr);
13083 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13084
13085 format %{ "andl $dst, $src\t# int" %}
13086 ins_encode %{
13087 __ andl($dst$$Register, $src$$constant);
13088 %}
13089 ins_pipe(ialu_reg);
13090 %}
13091
13092 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13093 %{
13094 predicate(UseAPX);
13095 match(Set dst (AndI src1 src2));
13096 effect(KILL cr);
13097 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13098
13099 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13100 ins_encode %{
13101 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13102 %}
13103 ins_pipe(ialu_reg);
13104 %}
13105
13106 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13107 %{
13108 predicate(UseAPX);
13109 match(Set dst (AndI (LoadI src1) src2));
13110 effect(KILL cr);
13111 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13112
13113 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13114 ins_encode %{
13115 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13116 %}
13117 ins_pipe(ialu_reg);
13118 %}
13119
13120 // And Register with Memory
13121 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13122 %{
13123 predicate(!UseAPX);
13124 match(Set dst (AndI dst (LoadI src)));
13125 effect(KILL cr);
13126 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13127
13128 ins_cost(150);
13129 format %{ "andl $dst, $src\t# int" %}
13130 ins_encode %{
13131 __ andl($dst$$Register, $src$$Address);
13132 %}
13133 ins_pipe(ialu_reg_mem);
13134 %}
13135
13136 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13137 %{
13138 predicate(UseAPX);
13139 match(Set dst (AndI src1 (LoadI src2)));
13140 effect(KILL cr);
13141 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13142
13143 ins_cost(150);
13144 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13145 ins_encode %{
13146 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13147 %}
13148 ins_pipe(ialu_reg_mem);
13149 %}
13150
13151 // And Memory with Register
13152 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13153 %{
13154 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13155 effect(KILL cr);
13156 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13157
13158 ins_cost(150);
13159 format %{ "andb $dst, $src\t# byte" %}
13160 ins_encode %{
13161 __ andb($dst$$Address, $src$$Register);
13162 %}
13163 ins_pipe(ialu_mem_reg);
13164 %}
13165
13166 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13167 %{
13168 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13169 effect(KILL cr);
13170 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13171
13172 ins_cost(150);
13173 format %{ "andl $dst, $src\t# int" %}
13174 ins_encode %{
13175 __ andl($dst$$Address, $src$$Register);
13176 %}
13177 ins_pipe(ialu_mem_reg);
13178 %}
13179
13180 // And Memory with Immediate
13181 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13182 %{
13183 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13184 effect(KILL cr);
13185 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13186
13187 ins_cost(125);
13188 format %{ "andl $dst, $src\t# int" %}
13189 ins_encode %{
13190 __ andl($dst$$Address, $src$$constant);
13191 %}
13192 ins_pipe(ialu_mem_imm);
13193 %}
13194
13195 // BMI1 instructions
13196 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13197 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13198 predicate(UseBMI1Instructions);
13199 effect(KILL cr);
13200 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13201
13202 ins_cost(125);
13203 format %{ "andnl $dst, $src1, $src2" %}
13204
13205 ins_encode %{
13206 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13207 %}
13208 ins_pipe(ialu_reg_mem);
13209 %}
13210
13211 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13212 match(Set dst (AndI (XorI src1 minus_1) src2));
13213 predicate(UseBMI1Instructions);
13214 effect(KILL cr);
13215 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13216
13217 format %{ "andnl $dst, $src1, $src2" %}
13218
13219 ins_encode %{
13220 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13221 %}
13222 ins_pipe(ialu_reg);
13223 %}
13224
13225 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13226 match(Set dst (AndI (SubI imm_zero src) src));
13227 predicate(UseBMI1Instructions);
13228 effect(KILL cr);
13229 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13230
13231 format %{ "blsil $dst, $src" %}
13232
13233 ins_encode %{
13234 __ blsil($dst$$Register, $src$$Register);
13235 %}
13236 ins_pipe(ialu_reg);
13237 %}
13238
13239 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13240 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13241 predicate(UseBMI1Instructions);
13242 effect(KILL cr);
13243 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13244
13245 ins_cost(125);
13246 format %{ "blsil $dst, $src" %}
13247
13248 ins_encode %{
13249 __ blsil($dst$$Register, $src$$Address);
13250 %}
13251 ins_pipe(ialu_reg_mem);
13252 %}
13253
13254 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13255 %{
13256 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13257 predicate(UseBMI1Instructions);
13258 effect(KILL cr);
13259 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13260
13261 ins_cost(125);
13262 format %{ "blsmskl $dst, $src" %}
13263
13264 ins_encode %{
13265 __ blsmskl($dst$$Register, $src$$Address);
13266 %}
13267 ins_pipe(ialu_reg_mem);
13268 %}
13269
13270 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13271 %{
13272 match(Set dst (XorI (AddI src minus_1) src));
13273 predicate(UseBMI1Instructions);
13274 effect(KILL cr);
13275 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13276
13277 format %{ "blsmskl $dst, $src" %}
13278
13279 ins_encode %{
13280 __ blsmskl($dst$$Register, $src$$Register);
13281 %}
13282
13283 ins_pipe(ialu_reg);
13284 %}
13285
13286 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13287 %{
13288 match(Set dst (AndI (AddI src minus_1) src) );
13289 predicate(UseBMI1Instructions);
13290 effect(KILL cr);
13291 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13292
13293 format %{ "blsrl $dst, $src" %}
13294
13295 ins_encode %{
13296 __ blsrl($dst$$Register, $src$$Register);
13297 %}
13298
13299 ins_pipe(ialu_reg_mem);
13300 %}
13301
13302 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13303 %{
13304 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13305 predicate(UseBMI1Instructions);
13306 effect(KILL cr);
13307 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13308
13309 ins_cost(125);
13310 format %{ "blsrl $dst, $src" %}
13311
13312 ins_encode %{
13313 __ blsrl($dst$$Register, $src$$Address);
13314 %}
13315
13316 ins_pipe(ialu_reg);
13317 %}
13318
13319 // Or Instructions
13320 // Or Register with Register
13321 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13322 %{
13323 predicate(!UseAPX);
13324 match(Set dst (OrI dst src));
13325 effect(KILL cr);
13326 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13327
13328 format %{ "orl $dst, $src\t# int" %}
13329 ins_encode %{
13330 __ orl($dst$$Register, $src$$Register);
13331 %}
13332 ins_pipe(ialu_reg_reg);
13333 %}
13334
13335 // Or Register with Register using New Data Destination (NDD)
13336 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13337 %{
13338 predicate(UseAPX);
13339 match(Set dst (OrI src1 src2));
13340 effect(KILL cr);
13341 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13342
13343 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13344 ins_encode %{
13345 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13346 %}
13347 ins_pipe(ialu_reg_reg);
13348 %}
13349
13350 // Or Register with Immediate
13351 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13352 %{
13353 predicate(!UseAPX);
13354 match(Set dst (OrI dst src));
13355 effect(KILL cr);
13356 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13357
13358 format %{ "orl $dst, $src\t# int" %}
13359 ins_encode %{
13360 __ orl($dst$$Register, $src$$constant);
13361 %}
13362 ins_pipe(ialu_reg);
13363 %}
13364
13365 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13366 %{
13367 predicate(UseAPX);
13368 match(Set dst (OrI src1 src2));
13369 effect(KILL cr);
13370 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13371
13372 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13373 ins_encode %{
13374 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13375 %}
13376 ins_pipe(ialu_reg);
13377 %}
13378
13379 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13380 %{
13381 predicate(UseAPX);
13382 match(Set dst (OrI src1 src2));
13383 effect(KILL cr);
13384 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13385
13386 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13387 ins_encode %{
13388 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13389 %}
13390 ins_pipe(ialu_reg);
13391 %}
13392
13393 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13394 %{
13395 predicate(UseAPX);
13396 match(Set dst (OrI (LoadI src1) src2));
13397 effect(KILL cr);
13398 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13399
13400 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13401 ins_encode %{
13402 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13403 %}
13404 ins_pipe(ialu_reg);
13405 %}
13406
13407 // Or Register with Memory
13408 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13409 %{
13410 predicate(!UseAPX);
13411 match(Set dst (OrI dst (LoadI src)));
13412 effect(KILL cr);
13413 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414
13415 ins_cost(150);
13416 format %{ "orl $dst, $src\t# int" %}
13417 ins_encode %{
13418 __ orl($dst$$Register, $src$$Address);
13419 %}
13420 ins_pipe(ialu_reg_mem);
13421 %}
13422
13423 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13424 %{
13425 predicate(UseAPX);
13426 match(Set dst (OrI src1 (LoadI src2)));
13427 effect(KILL cr);
13428 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13429
13430 ins_cost(150);
13431 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13432 ins_encode %{
13433 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13434 %}
13435 ins_pipe(ialu_reg_mem);
13436 %}
13437
13438 // Or Memory with Register
13439 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13440 %{
13441 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13442 effect(KILL cr);
13443 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13444
13445 ins_cost(150);
13446 format %{ "orb $dst, $src\t# byte" %}
13447 ins_encode %{
13448 __ orb($dst$$Address, $src$$Register);
13449 %}
13450 ins_pipe(ialu_mem_reg);
13451 %}
13452
13453 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13454 %{
13455 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13456 effect(KILL cr);
13457 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13458
13459 ins_cost(150);
13460 format %{ "orl $dst, $src\t# int" %}
13461 ins_encode %{
13462 __ orl($dst$$Address, $src$$Register);
13463 %}
13464 ins_pipe(ialu_mem_reg);
13465 %}
13466
13467 // Or Memory with Immediate
13468 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13469 %{
13470 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13471 effect(KILL cr);
13472 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13473
13474 ins_cost(125);
13475 format %{ "orl $dst, $src\t# int" %}
13476 ins_encode %{
13477 __ orl($dst$$Address, $src$$constant);
13478 %}
13479 ins_pipe(ialu_mem_imm);
13480 %}
13481
13482 // Xor Instructions
13483 // Xor Register with Register
13484 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13485 %{
13486 predicate(!UseAPX);
13487 match(Set dst (XorI dst src));
13488 effect(KILL cr);
13489 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13490
13491 format %{ "xorl $dst, $src\t# int" %}
13492 ins_encode %{
13493 __ xorl($dst$$Register, $src$$Register);
13494 %}
13495 ins_pipe(ialu_reg_reg);
13496 %}
13497
13498 // Xor Register with Register using New Data Destination (NDD)
13499 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13500 %{
13501 predicate(UseAPX);
13502 match(Set dst (XorI src1 src2));
13503 effect(KILL cr);
13504 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13505
13506 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13507 ins_encode %{
13508 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13509 %}
13510 ins_pipe(ialu_reg_reg);
13511 %}
13512
13513 // Xor Register with Immediate -1
13514 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13515 %{
13516 predicate(!UseAPX);
13517 match(Set dst (XorI dst imm));
13518
13519 format %{ "notl $dst" %}
13520 ins_encode %{
13521 __ notl($dst$$Register);
13522 %}
13523 ins_pipe(ialu_reg);
13524 %}
13525
13526 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13527 %{
13528 match(Set dst (XorI src imm));
13529 predicate(UseAPX);
13530 flag(PD::Flag_ndd_demotable_opr1);
13531
13532 format %{ "enotl $dst, $src" %}
13533 ins_encode %{
13534 __ enotl($dst$$Register, $src$$Register);
13535 %}
13536 ins_pipe(ialu_reg);
13537 %}
13538
13539 // Xor Register with Immediate
13540 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13541 %{
13542 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13543 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13544 match(Set dst (XorI dst src));
13545 effect(KILL cr);
13546 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547
13548 format %{ "xorl $dst, $src\t# int" %}
13549 ins_encode %{
13550 __ xorl($dst$$Register, $src$$constant);
13551 %}
13552 ins_pipe(ialu_reg);
13553 %}
13554
13555 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13556 %{
13557 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13558 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13559 match(Set dst (XorI src1 src2));
13560 effect(KILL cr);
13561 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13562
13563 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13564 ins_encode %{
13565 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13566 %}
13567 ins_pipe(ialu_reg);
13568 %}
13569
13570 // Xor Memory with Immediate
13571 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13572 %{
13573 predicate(UseAPX);
13574 match(Set dst (XorI (LoadI src1) src2));
13575 effect(KILL cr);
13576 ins_cost(150);
13577 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13578
13579 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13580 ins_encode %{
13581 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13582 %}
13583 ins_pipe(ialu_reg);
13584 %}
13585
13586 // Xor Register with Memory
13587 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13588 %{
13589 predicate(!UseAPX);
13590 match(Set dst (XorI dst (LoadI src)));
13591 effect(KILL cr);
13592 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13593
13594 ins_cost(150);
13595 format %{ "xorl $dst, $src\t# int" %}
13596 ins_encode %{
13597 __ xorl($dst$$Register, $src$$Address);
13598 %}
13599 ins_pipe(ialu_reg_mem);
13600 %}
13601
13602 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13603 %{
13604 predicate(UseAPX);
13605 match(Set dst (XorI src1 (LoadI src2)));
13606 effect(KILL cr);
13607 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13608
13609 ins_cost(150);
13610 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13611 ins_encode %{
13612 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13613 %}
13614 ins_pipe(ialu_reg_mem);
13615 %}
13616
13617 // Xor Memory with Register
13618 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13619 %{
13620 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13621 effect(KILL cr);
13622 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13623
13624 ins_cost(150);
13625 format %{ "xorb $dst, $src\t# byte" %}
13626 ins_encode %{
13627 __ xorb($dst$$Address, $src$$Register);
13628 %}
13629 ins_pipe(ialu_mem_reg);
13630 %}
13631
13632 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13633 %{
13634 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13635 effect(KILL cr);
13636 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13637
13638 ins_cost(150);
13639 format %{ "xorl $dst, $src\t# int" %}
13640 ins_encode %{
13641 __ xorl($dst$$Address, $src$$Register);
13642 %}
13643 ins_pipe(ialu_mem_reg);
13644 %}
13645
13646 // Xor Memory with Immediate
13647 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13648 %{
13649 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13650 effect(KILL cr);
13651 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13652
13653 ins_cost(125);
13654 format %{ "xorl $dst, $src\t# int" %}
13655 ins_encode %{
13656 __ xorl($dst$$Address, $src$$constant);
13657 %}
13658 ins_pipe(ialu_mem_imm);
13659 %}
13660
13661
13662 // Long Logical Instructions
13663
13664 // And Instructions
13665 // And Register with Register
13666 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13667 %{
13668 predicate(!UseAPX);
13669 match(Set dst (AndL dst src));
13670 effect(KILL cr);
13671 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13672
13673 format %{ "andq $dst, $src\t# long" %}
13674 ins_encode %{
13675 __ andq($dst$$Register, $src$$Register);
13676 %}
13677 ins_pipe(ialu_reg_reg);
13678 %}
13679
13680 // And Register with Register using New Data Destination (NDD)
13681 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13682 %{
13683 predicate(UseAPX);
13684 match(Set dst (AndL src1 src2));
13685 effect(KILL cr);
13686 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13687
13688 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13689 ins_encode %{
13690 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13691
13692 %}
13693 ins_pipe(ialu_reg_reg);
13694 %}
13695
13696 // And Register with Immediate 255
13697 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13698 %{
13699 match(Set dst (AndL src mask));
13700
13701 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13702 ins_encode %{
13703 // movzbl zeroes out the upper 32-bit and does not need REX.W
13704 __ movzbl($dst$$Register, $src$$Register);
13705 %}
13706 ins_pipe(ialu_reg);
13707 %}
13708
13709 // And Register with Immediate 65535
13710 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13711 %{
13712 match(Set dst (AndL src mask));
13713
13714 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13715 ins_encode %{
13716 // movzwl zeroes out the upper 32-bit and does not need REX.W
13717 __ movzwl($dst$$Register, $src$$Register);
13718 %}
13719 ins_pipe(ialu_reg);
13720 %}
13721
13722 // And Register with Immediate
13723 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13724 %{
13725 predicate(!UseAPX);
13726 match(Set dst (AndL dst src));
13727 effect(KILL cr);
13728 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13729
13730 format %{ "andq $dst, $src\t# long" %}
13731 ins_encode %{
13732 __ andq($dst$$Register, $src$$constant);
13733 %}
13734 ins_pipe(ialu_reg);
13735 %}
13736
13737 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13738 %{
13739 predicate(UseAPX);
13740 match(Set dst (AndL src1 src2));
13741 effect(KILL cr);
13742 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13743
13744 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13745 ins_encode %{
13746 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13747 %}
13748 ins_pipe(ialu_reg);
13749 %}
13750
13751 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13752 %{
13753 predicate(UseAPX);
13754 match(Set dst (AndL (LoadL src1) src2));
13755 effect(KILL cr);
13756 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13757
13758 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13759 ins_encode %{
13760 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13761 %}
13762 ins_pipe(ialu_reg);
13763 %}
13764
13765 // And Register with Memory
13766 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13767 %{
13768 predicate(!UseAPX);
13769 match(Set dst (AndL dst (LoadL src)));
13770 effect(KILL cr);
13771 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13772
13773 ins_cost(150);
13774 format %{ "andq $dst, $src\t# long" %}
13775 ins_encode %{
13776 __ andq($dst$$Register, $src$$Address);
13777 %}
13778 ins_pipe(ialu_reg_mem);
13779 %}
13780
13781 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13782 %{
13783 predicate(UseAPX);
13784 match(Set dst (AndL src1 (LoadL src2)));
13785 effect(KILL cr);
13786 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13787
13788 ins_cost(150);
13789 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13790 ins_encode %{
13791 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13792 %}
13793 ins_pipe(ialu_reg_mem);
13794 %}
13795
13796 // And Memory with Register
13797 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13798 %{
13799 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13800 effect(KILL cr);
13801 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13802
13803 ins_cost(150);
13804 format %{ "andq $dst, $src\t# long" %}
13805 ins_encode %{
13806 __ andq($dst$$Address, $src$$Register);
13807 %}
13808 ins_pipe(ialu_mem_reg);
13809 %}
13810
13811 // And Memory with Immediate
13812 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13813 %{
13814 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13815 effect(KILL cr);
13816 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13817
13818 ins_cost(125);
13819 format %{ "andq $dst, $src\t# long" %}
13820 ins_encode %{
13821 __ andq($dst$$Address, $src$$constant);
13822 %}
13823 ins_pipe(ialu_mem_imm);
13824 %}
13825
13826 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13827 %{
13828 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13829 // because AND/OR works well enough for 8/32-bit values.
13830 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13831
13832 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13833 effect(KILL cr);
13834
13835 ins_cost(125);
13836 format %{ "btrq $dst, log2(not($con))\t# long" %}
13837 ins_encode %{
13838 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13839 %}
13840 ins_pipe(ialu_mem_imm);
13841 %}
13842
13843 // BMI1 instructions
13844 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13845 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13846 predicate(UseBMI1Instructions);
13847 effect(KILL cr);
13848 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13849
13850 ins_cost(125);
13851 format %{ "andnq $dst, $src1, $src2" %}
13852
13853 ins_encode %{
13854 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13855 %}
13856 ins_pipe(ialu_reg_mem);
13857 %}
13858
13859 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13860 match(Set dst (AndL (XorL src1 minus_1) src2));
13861 predicate(UseBMI1Instructions);
13862 effect(KILL cr);
13863 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13864
13865 format %{ "andnq $dst, $src1, $src2" %}
13866
13867 ins_encode %{
13868 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13869 %}
13870 ins_pipe(ialu_reg_mem);
13871 %}
13872
13873 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13874 match(Set dst (AndL (SubL imm_zero src) src));
13875 predicate(UseBMI1Instructions);
13876 effect(KILL cr);
13877 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13878
13879 format %{ "blsiq $dst, $src" %}
13880
13881 ins_encode %{
13882 __ blsiq($dst$$Register, $src$$Register);
13883 %}
13884 ins_pipe(ialu_reg);
13885 %}
13886
13887 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13888 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13889 predicate(UseBMI1Instructions);
13890 effect(KILL cr);
13891 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13892
13893 ins_cost(125);
13894 format %{ "blsiq $dst, $src" %}
13895
13896 ins_encode %{
13897 __ blsiq($dst$$Register, $src$$Address);
13898 %}
13899 ins_pipe(ialu_reg_mem);
13900 %}
13901
13902 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13903 %{
13904 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13905 predicate(UseBMI1Instructions);
13906 effect(KILL cr);
13907 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13908
13909 ins_cost(125);
13910 format %{ "blsmskq $dst, $src" %}
13911
13912 ins_encode %{
13913 __ blsmskq($dst$$Register, $src$$Address);
13914 %}
13915 ins_pipe(ialu_reg_mem);
13916 %}
13917
13918 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13919 %{
13920 match(Set dst (XorL (AddL src minus_1) src));
13921 predicate(UseBMI1Instructions);
13922 effect(KILL cr);
13923 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13924
13925 format %{ "blsmskq $dst, $src" %}
13926
13927 ins_encode %{
13928 __ blsmskq($dst$$Register, $src$$Register);
13929 %}
13930
13931 ins_pipe(ialu_reg);
13932 %}
13933
13934 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13935 %{
13936 match(Set dst (AndL (AddL src minus_1) src) );
13937 predicate(UseBMI1Instructions);
13938 effect(KILL cr);
13939 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13940
13941 format %{ "blsrq $dst, $src" %}
13942
13943 ins_encode %{
13944 __ blsrq($dst$$Register, $src$$Register);
13945 %}
13946
13947 ins_pipe(ialu_reg);
13948 %}
13949
13950 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13951 %{
13952 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13953 predicate(UseBMI1Instructions);
13954 effect(KILL cr);
13955 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13956
13957 ins_cost(125);
13958 format %{ "blsrq $dst, $src" %}
13959
13960 ins_encode %{
13961 __ blsrq($dst$$Register, $src$$Address);
13962 %}
13963
13964 ins_pipe(ialu_reg);
13965 %}
13966
13967 // Or Instructions
13968 // Or Register with Register
13969 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13970 %{
13971 predicate(!UseAPX);
13972 match(Set dst (OrL dst src));
13973 effect(KILL cr);
13974 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13975
13976 format %{ "orq $dst, $src\t# long" %}
13977 ins_encode %{
13978 __ orq($dst$$Register, $src$$Register);
13979 %}
13980 ins_pipe(ialu_reg_reg);
13981 %}
13982
13983 // Or Register with Register using New Data Destination (NDD)
13984 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13985 %{
13986 predicate(UseAPX);
13987 match(Set dst (OrL src1 src2));
13988 effect(KILL cr);
13989 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13990
13991 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13992 ins_encode %{
13993 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13994
13995 %}
13996 ins_pipe(ialu_reg_reg);
13997 %}
13998
13999 // Use any_RegP to match R15 (TLS register) without spilling.
14000 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
14001 match(Set dst (OrL dst (CastP2X src)));
14002 effect(KILL cr);
14003 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14004
14005 format %{ "orq $dst, $src\t# long" %}
14006 ins_encode %{
14007 __ orq($dst$$Register, $src$$Register);
14008 %}
14009 ins_pipe(ialu_reg_reg);
14010 %}
14011
14012 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
14013 match(Set dst (OrL src1 (CastP2X src2)));
14014 effect(KILL cr);
14015 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14016
14017 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14018 ins_encode %{
14019 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14020 %}
14021 ins_pipe(ialu_reg_reg);
14022 %}
14023
14024 // Or Register with Immediate
14025 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14026 %{
14027 predicate(!UseAPX);
14028 match(Set dst (OrL dst src));
14029 effect(KILL cr);
14030 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14031
14032 format %{ "orq $dst, $src\t# long" %}
14033 ins_encode %{
14034 __ orq($dst$$Register, $src$$constant);
14035 %}
14036 ins_pipe(ialu_reg);
14037 %}
14038
14039 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14040 %{
14041 predicate(UseAPX);
14042 match(Set dst (OrL src1 src2));
14043 effect(KILL cr);
14044 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14045
14046 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14047 ins_encode %{
14048 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14049 %}
14050 ins_pipe(ialu_reg);
14051 %}
14052
14053 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14054 %{
14055 predicate(UseAPX);
14056 match(Set dst (OrL src1 src2));
14057 effect(KILL cr);
14058 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14059
14060 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14061 ins_encode %{
14062 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14063 %}
14064 ins_pipe(ialu_reg);
14065 %}
14066
14067 // Or Memory with Immediate
14068 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14069 %{
14070 predicate(UseAPX);
14071 match(Set dst (OrL (LoadL src1) src2));
14072 effect(KILL cr);
14073 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14074
14075 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14076 ins_encode %{
14077 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14078 %}
14079 ins_pipe(ialu_reg);
14080 %}
14081
14082 // Or Register with Memory
14083 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14084 %{
14085 predicate(!UseAPX);
14086 match(Set dst (OrL dst (LoadL src)));
14087 effect(KILL cr);
14088 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14089
14090 ins_cost(150);
14091 format %{ "orq $dst, $src\t# long" %}
14092 ins_encode %{
14093 __ orq($dst$$Register, $src$$Address);
14094 %}
14095 ins_pipe(ialu_reg_mem);
14096 %}
14097
14098 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14099 %{
14100 predicate(UseAPX);
14101 match(Set dst (OrL src1 (LoadL src2)));
14102 effect(KILL cr);
14103 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14104
14105 ins_cost(150);
14106 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14107 ins_encode %{
14108 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14109 %}
14110 ins_pipe(ialu_reg_mem);
14111 %}
14112
14113 // Or Memory with Register
14114 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14115 %{
14116 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14117 effect(KILL cr);
14118 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14119
14120 ins_cost(150);
14121 format %{ "orq $dst, $src\t# long" %}
14122 ins_encode %{
14123 __ orq($dst$$Address, $src$$Register);
14124 %}
14125 ins_pipe(ialu_mem_reg);
14126 %}
14127
14128 // Or Memory with Immediate
14129 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14130 %{
14131 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14132 effect(KILL cr);
14133 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14134
14135 ins_cost(125);
14136 format %{ "orq $dst, $src\t# long" %}
14137 ins_encode %{
14138 __ orq($dst$$Address, $src$$constant);
14139 %}
14140 ins_pipe(ialu_mem_imm);
14141 %}
14142
14143 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14144 %{
14145 // con should be a pure 64-bit power of 2 immediate
14146 // because AND/OR works well enough for 8/32-bit values.
14147 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14148
14149 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14150 effect(KILL cr);
14151
14152 ins_cost(125);
14153 format %{ "btsq $dst, log2($con)\t# long" %}
14154 ins_encode %{
14155 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14156 %}
14157 ins_pipe(ialu_mem_imm);
14158 %}
14159
14160 // Xor Instructions
14161 // Xor Register with Register
14162 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14163 %{
14164 predicate(!UseAPX);
14165 match(Set dst (XorL dst src));
14166 effect(KILL cr);
14167 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14168
14169 format %{ "xorq $dst, $src\t# long" %}
14170 ins_encode %{
14171 __ xorq($dst$$Register, $src$$Register);
14172 %}
14173 ins_pipe(ialu_reg_reg);
14174 %}
14175
14176 // Xor Register with Register using New Data Destination (NDD)
14177 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14178 %{
14179 predicate(UseAPX);
14180 match(Set dst (XorL src1 src2));
14181 effect(KILL cr);
14182 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14183
14184 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14185 ins_encode %{
14186 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14187 %}
14188 ins_pipe(ialu_reg_reg);
14189 %}
14190
14191 // Xor Register with Immediate -1
14192 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14193 %{
14194 predicate(!UseAPX);
14195 match(Set dst (XorL dst imm));
14196
14197 format %{ "notq $dst" %}
14198 ins_encode %{
14199 __ notq($dst$$Register);
14200 %}
14201 ins_pipe(ialu_reg);
14202 %}
14203
14204 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14205 %{
14206 predicate(UseAPX);
14207 match(Set dst (XorL src imm));
14208 flag(PD::Flag_ndd_demotable_opr1);
14209
14210 format %{ "enotq $dst, $src" %}
14211 ins_encode %{
14212 __ enotq($dst$$Register, $src$$Register);
14213 %}
14214 ins_pipe(ialu_reg);
14215 %}
14216
14217 // Xor Register with Immediate
14218 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14219 %{
14220 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14221 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14222 match(Set dst (XorL dst src));
14223 effect(KILL cr);
14224 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14225
14226 format %{ "xorq $dst, $src\t# long" %}
14227 ins_encode %{
14228 __ xorq($dst$$Register, $src$$constant);
14229 %}
14230 ins_pipe(ialu_reg);
14231 %}
14232
14233 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14234 %{
14235 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14236 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14237 match(Set dst (XorL src1 src2));
14238 effect(KILL cr);
14239 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14240
14241 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14242 ins_encode %{
14243 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14244 %}
14245 ins_pipe(ialu_reg);
14246 %}
14247
14248 // Xor Memory with Immediate
14249 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14250 %{
14251 predicate(UseAPX);
14252 match(Set dst (XorL (LoadL src1) src2));
14253 effect(KILL cr);
14254 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14255 ins_cost(150);
14256
14257 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14258 ins_encode %{
14259 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14260 %}
14261 ins_pipe(ialu_reg);
14262 %}
14263
14264 // Xor Register with Memory
14265 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14266 %{
14267 predicate(!UseAPX);
14268 match(Set dst (XorL dst (LoadL src)));
14269 effect(KILL cr);
14270 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14271
14272 ins_cost(150);
14273 format %{ "xorq $dst, $src\t# long" %}
14274 ins_encode %{
14275 __ xorq($dst$$Register, $src$$Address);
14276 %}
14277 ins_pipe(ialu_reg_mem);
14278 %}
14279
14280 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14281 %{
14282 predicate(UseAPX);
14283 match(Set dst (XorL src1 (LoadL src2)));
14284 effect(KILL cr);
14285 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14286
14287 ins_cost(150);
14288 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14289 ins_encode %{
14290 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14291 %}
14292 ins_pipe(ialu_reg_mem);
14293 %}
14294
14295 // Xor Memory with Register
14296 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14297 %{
14298 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14299 effect(KILL cr);
14300 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14301
14302 ins_cost(150);
14303 format %{ "xorq $dst, $src\t# long" %}
14304 ins_encode %{
14305 __ xorq($dst$$Address, $src$$Register);
14306 %}
14307 ins_pipe(ialu_mem_reg);
14308 %}
14309
14310 // Xor Memory with Immediate
14311 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14312 %{
14313 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14314 effect(KILL cr);
14315 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14316
14317 ins_cost(125);
14318 format %{ "xorq $dst, $src\t# long" %}
14319 ins_encode %{
14320 __ xorq($dst$$Address, $src$$constant);
14321 %}
14322 ins_pipe(ialu_mem_imm);
14323 %}
14324
14325 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14326 %{
14327 match(Set dst (CmpLTMask p q));
14328 effect(KILL cr);
14329
14330 ins_cost(400);
14331 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14332 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14333 "negl $dst" %}
14334 ins_encode %{
14335 __ cmpl($p$$Register, $q$$Register);
14336 __ setcc(Assembler::less, $dst$$Register);
14337 __ negl($dst$$Register);
14338 %}
14339 ins_pipe(pipe_slow);
14340 %}
14341
14342 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14343 %{
14344 match(Set dst (CmpLTMask dst zero));
14345 effect(KILL cr);
14346
14347 ins_cost(100);
14348 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14349 ins_encode %{
14350 __ sarl($dst$$Register, 31);
14351 %}
14352 ins_pipe(ialu_reg);
14353 %}
14354
14355 /* Better to save a register than avoid a branch */
14356 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14357 %{
14358 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14359 effect(KILL cr);
14360 ins_cost(300);
14361 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14362 "jge done\n\t"
14363 "addl $p,$y\n"
14364 "done: " %}
14365 ins_encode %{
14366 Register Rp = $p$$Register;
14367 Register Rq = $q$$Register;
14368 Register Ry = $y$$Register;
14369 Label done;
14370 __ subl(Rp, Rq);
14371 __ jccb(Assembler::greaterEqual, done);
14372 __ addl(Rp, Ry);
14373 __ bind(done);
14374 %}
14375 ins_pipe(pipe_cmplt);
14376 %}
14377
14378 /* Better to save a register than avoid a branch */
14379 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14380 %{
14381 match(Set y (AndI (CmpLTMask p q) y));
14382 effect(KILL cr);
14383
14384 ins_cost(300);
14385
14386 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14387 "jlt done\n\t"
14388 "xorl $y, $y\n"
14389 "done: " %}
14390 ins_encode %{
14391 Register Rp = $p$$Register;
14392 Register Rq = $q$$Register;
14393 Register Ry = $y$$Register;
14394 Label done;
14395 __ cmpl(Rp, Rq);
14396 __ jccb(Assembler::less, done);
14397 __ xorl(Ry, Ry);
14398 __ bind(done);
14399 %}
14400 ins_pipe(pipe_cmplt);
14401 %}
14402
14403
14404 //---------- FP Instructions------------------------------------------------
14405
14406 // Really expensive, avoid
14407 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14408 %{
14409 match(Set cr (CmpF src1 src2));
14410
14411 ins_cost(500);
14412 format %{ "ucomiss $src1, $src2\n\t"
14413 "jnp,s exit\n\t"
14414 "pushfq\t# saw NaN, set CF\n\t"
14415 "andq [rsp], #0xffffff2b\n\t"
14416 "popfq\n"
14417 "exit:" %}
14418 ins_encode %{
14419 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14420 emit_cmpfp_fixup(masm);
14421 %}
14422 ins_pipe(pipe_slow);
14423 %}
14424
14425 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14426 match(Set cr (CmpF src1 src2));
14427
14428 ins_cost(100);
14429 format %{ "ucomiss $src1, $src2" %}
14430 ins_encode %{
14431 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14432 %}
14433 ins_pipe(pipe_slow);
14434 %}
14435
14436 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14437 match(Set cr (CmpF src1 src2));
14438
14439 ins_cost(100);
14440 format %{ "vucomxss $src1, $src2" %}
14441 ins_encode %{
14442 __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14443 %}
14444 ins_pipe(pipe_slow);
14445 %}
14446
14447 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14448 match(Set cr (CmpF src1 (LoadF src2)));
14449
14450 ins_cost(100);
14451 format %{ "ucomiss $src1, $src2" %}
14452 ins_encode %{
14453 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14454 %}
14455 ins_pipe(pipe_slow);
14456 %}
14457
14458 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14459 match(Set cr (CmpF src1 (LoadF src2)));
14460
14461 ins_cost(100);
14462 format %{ "vucomxss $src1, $src2" %}
14463 ins_encode %{
14464 __ vucomxss($src1$$XMMRegister, $src2$$Address);
14465 %}
14466 ins_pipe(pipe_slow);
14467 %}
14468
14469 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14470 match(Set cr (CmpF src con));
14471
14472 ins_cost(100);
14473 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14474 ins_encode %{
14475 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14476 %}
14477 ins_pipe(pipe_slow);
14478 %}
14479
14480 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14481 match(Set cr (CmpF src con));
14482
14483 ins_cost(100);
14484 format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14485 ins_encode %{
14486 __ vucomxss($src$$XMMRegister, $constantaddress($con));
14487 %}
14488 ins_pipe(pipe_slow);
14489 %}
14490
14491 // Really expensive, avoid
14492 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14493 %{
14494 match(Set cr (CmpD src1 src2));
14495
14496 ins_cost(500);
14497 format %{ "ucomisd $src1, $src2\n\t"
14498 "jnp,s exit\n\t"
14499 "pushfq\t# saw NaN, set CF\n\t"
14500 "andq [rsp], #0xffffff2b\n\t"
14501 "popfq\n"
14502 "exit:" %}
14503 ins_encode %{
14504 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14505 emit_cmpfp_fixup(masm);
14506 %}
14507 ins_pipe(pipe_slow);
14508 %}
14509
14510 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14511 match(Set cr (CmpD src1 src2));
14512
14513 ins_cost(100);
14514 format %{ "ucomisd $src1, $src2 test" %}
14515 ins_encode %{
14516 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14517 %}
14518 ins_pipe(pipe_slow);
14519 %}
14520
14521 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14522 match(Set cr (CmpD src1 src2));
14523
14524 ins_cost(100);
14525 format %{ "vucomxsd $src1, $src2 test" %}
14526 ins_encode %{
14527 __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14528 %}
14529 ins_pipe(pipe_slow);
14530 %}
14531
14532 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14533 match(Set cr (CmpD src1 (LoadD src2)));
14534
14535 ins_cost(100);
14536 format %{ "ucomisd $src1, $src2" %}
14537 ins_encode %{
14538 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14539 %}
14540 ins_pipe(pipe_slow);
14541 %}
14542
14543 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14544 match(Set cr (CmpD src1 (LoadD src2)));
14545
14546 ins_cost(100);
14547 format %{ "vucomxsd $src1, $src2" %}
14548 ins_encode %{
14549 __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14550 %}
14551 ins_pipe(pipe_slow);
14552 %}
14553
14554 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14555 match(Set cr (CmpD src con));
14556 ins_cost(100);
14557 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14558 ins_encode %{
14559 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14560 %}
14561 ins_pipe(pipe_slow);
14562 %}
14563
14564 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14565 match(Set cr (CmpD src con));
14566
14567 ins_cost(100);
14568 format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14569 ins_encode %{
14570 __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14571 %}
14572 ins_pipe(pipe_slow);
14573 %}
14574
14575 // Compare into -1,0,1
14576 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14577 %{
14578 match(Set dst (CmpF3 src1 src2));
14579 effect(KILL cr);
14580
14581 ins_cost(275);
14582 format %{ "ucomiss $src1, $src2\n\t"
14583 "movl $dst, #-1\n\t"
14584 "jp,s done\n\t"
14585 "jb,s done\n\t"
14586 "setne $dst\n\t"
14587 "movzbl $dst, $dst\n"
14588 "done:" %}
14589 ins_encode %{
14590 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14591 emit_cmpfp3(masm, $dst$$Register);
14592 %}
14593 ins_pipe(pipe_slow);
14594 %}
14595
14596 // Compare into -1,0,1
14597 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14598 %{
14599 match(Set dst (CmpF3 src1 (LoadF src2)));
14600 effect(KILL cr);
14601
14602 ins_cost(275);
14603 format %{ "ucomiss $src1, $src2\n\t"
14604 "movl $dst, #-1\n\t"
14605 "jp,s done\n\t"
14606 "jb,s done\n\t"
14607 "setne $dst\n\t"
14608 "movzbl $dst, $dst\n"
14609 "done:" %}
14610 ins_encode %{
14611 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14612 emit_cmpfp3(masm, $dst$$Register);
14613 %}
14614 ins_pipe(pipe_slow);
14615 %}
14616
14617 // Compare into -1,0,1
14618 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14619 match(Set dst (CmpF3 src con));
14620 effect(KILL cr);
14621
14622 ins_cost(275);
14623 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14624 "movl $dst, #-1\n\t"
14625 "jp,s done\n\t"
14626 "jb,s done\n\t"
14627 "setne $dst\n\t"
14628 "movzbl $dst, $dst\n"
14629 "done:" %}
14630 ins_encode %{
14631 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14632 emit_cmpfp3(masm, $dst$$Register);
14633 %}
14634 ins_pipe(pipe_slow);
14635 %}
14636
14637 // Compare into -1,0,1
14638 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14639 %{
14640 match(Set dst (CmpD3 src1 src2));
14641 effect(KILL cr);
14642
14643 ins_cost(275);
14644 format %{ "ucomisd $src1, $src2\n\t"
14645 "movl $dst, #-1\n\t"
14646 "jp,s done\n\t"
14647 "jb,s done\n\t"
14648 "setne $dst\n\t"
14649 "movzbl $dst, $dst\n"
14650 "done:" %}
14651 ins_encode %{
14652 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14653 emit_cmpfp3(masm, $dst$$Register);
14654 %}
14655 ins_pipe(pipe_slow);
14656 %}
14657
14658 // Compare into -1,0,1
14659 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14660 %{
14661 match(Set dst (CmpD3 src1 (LoadD src2)));
14662 effect(KILL cr);
14663
14664 ins_cost(275);
14665 format %{ "ucomisd $src1, $src2\n\t"
14666 "movl $dst, #-1\n\t"
14667 "jp,s done\n\t"
14668 "jb,s done\n\t"
14669 "setne $dst\n\t"
14670 "movzbl $dst, $dst\n"
14671 "done:" %}
14672 ins_encode %{
14673 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14674 emit_cmpfp3(masm, $dst$$Register);
14675 %}
14676 ins_pipe(pipe_slow);
14677 %}
14678
14679 // Compare into -1,0,1
14680 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14681 match(Set dst (CmpD3 src con));
14682 effect(KILL cr);
14683
14684 ins_cost(275);
14685 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14686 "movl $dst, #-1\n\t"
14687 "jp,s done\n\t"
14688 "jb,s done\n\t"
14689 "setne $dst\n\t"
14690 "movzbl $dst, $dst\n"
14691 "done:" %}
14692 ins_encode %{
14693 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14694 emit_cmpfp3(masm, $dst$$Register);
14695 %}
14696 ins_pipe(pipe_slow);
14697 %}
14698
14699 //----------Arithmetic Conversion Instructions---------------------------------
14700
14701 instruct convF2D_reg_reg(regD dst, regF src)
14702 %{
14703 match(Set dst (ConvF2D src));
14704
14705 format %{ "cvtss2sd $dst, $src" %}
14706 ins_encode %{
14707 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14708 %}
14709 ins_pipe(pipe_slow); // XXX
14710 %}
14711
14712 instruct convF2D_reg_mem(regD dst, memory src)
14713 %{
14714 predicate(UseAVX == 0);
14715 match(Set dst (ConvF2D (LoadF src)));
14716
14717 format %{ "cvtss2sd $dst, $src" %}
14718 ins_encode %{
14719 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14720 %}
14721 ins_pipe(pipe_slow); // XXX
14722 %}
14723
14724 instruct convD2F_reg_reg(regF dst, regD src)
14725 %{
14726 match(Set dst (ConvD2F src));
14727
14728 format %{ "cvtsd2ss $dst, $src" %}
14729 ins_encode %{
14730 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14731 %}
14732 ins_pipe(pipe_slow); // XXX
14733 %}
14734
14735 instruct convD2F_reg_mem(regF dst, memory src)
14736 %{
14737 predicate(UseAVX == 0);
14738 match(Set dst (ConvD2F (LoadD src)));
14739
14740 format %{ "cvtsd2ss $dst, $src" %}
14741 ins_encode %{
14742 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14743 %}
14744 ins_pipe(pipe_slow); // XXX
14745 %}
14746
14747 // XXX do mem variants
14748 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14749 %{
14750 predicate(!VM_Version::supports_avx10_2());
14751 match(Set dst (ConvF2I src));
14752 effect(KILL cr);
14753 format %{ "convert_f2i $dst, $src" %}
14754 ins_encode %{
14755 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14756 %}
14757 ins_pipe(pipe_slow);
14758 %}
14759
14760 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14761 %{
14762 predicate(VM_Version::supports_avx10_2());
14763 match(Set dst (ConvF2I src));
14764 format %{ "evcvttss2sisl $dst, $src" %}
14765 ins_encode %{
14766 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14767 %}
14768 ins_pipe(pipe_slow);
14769 %}
14770
14771 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14772 %{
14773 predicate(VM_Version::supports_avx10_2());
14774 match(Set dst (ConvF2I (LoadF src)));
14775 format %{ "evcvttss2sisl $dst, $src" %}
14776 ins_encode %{
14777 __ evcvttss2sisl($dst$$Register, $src$$Address);
14778 %}
14779 ins_pipe(pipe_slow);
14780 %}
14781
14782 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14783 %{
14784 predicate(!VM_Version::supports_avx10_2());
14785 match(Set dst (ConvF2L src));
14786 effect(KILL cr);
14787 format %{ "convert_f2l $dst, $src"%}
14788 ins_encode %{
14789 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14790 %}
14791 ins_pipe(pipe_slow);
14792 %}
14793
14794 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14795 %{
14796 predicate(VM_Version::supports_avx10_2());
14797 match(Set dst (ConvF2L src));
14798 format %{ "evcvttss2sisq $dst, $src" %}
14799 ins_encode %{
14800 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14801 %}
14802 ins_pipe(pipe_slow);
14803 %}
14804
14805 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14806 %{
14807 predicate(VM_Version::supports_avx10_2());
14808 match(Set dst (ConvF2L (LoadF src)));
14809 format %{ "evcvttss2sisq $dst, $src" %}
14810 ins_encode %{
14811 __ evcvttss2sisq($dst$$Register, $src$$Address);
14812 %}
14813 ins_pipe(pipe_slow);
14814 %}
14815
14816 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14817 %{
14818 predicate(!VM_Version::supports_avx10_2());
14819 match(Set dst (ConvD2I src));
14820 effect(KILL cr);
14821 format %{ "convert_d2i $dst, $src"%}
14822 ins_encode %{
14823 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14824 %}
14825 ins_pipe(pipe_slow);
14826 %}
14827
14828 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14829 %{
14830 predicate(VM_Version::supports_avx10_2());
14831 match(Set dst (ConvD2I src));
14832 format %{ "evcvttsd2sisl $dst, $src" %}
14833 ins_encode %{
14834 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14835 %}
14836 ins_pipe(pipe_slow);
14837 %}
14838
14839 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14840 %{
14841 predicate(VM_Version::supports_avx10_2());
14842 match(Set dst (ConvD2I (LoadD src)));
14843 format %{ "evcvttsd2sisl $dst, $src" %}
14844 ins_encode %{
14845 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14846 %}
14847 ins_pipe(pipe_slow);
14848 %}
14849
14850 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14851 %{
14852 predicate(!VM_Version::supports_avx10_2());
14853 match(Set dst (ConvD2L src));
14854 effect(KILL cr);
14855 format %{ "convert_d2l $dst, $src"%}
14856 ins_encode %{
14857 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14858 %}
14859 ins_pipe(pipe_slow);
14860 %}
14861
14862 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14863 %{
14864 predicate(VM_Version::supports_avx10_2());
14865 match(Set dst (ConvD2L src));
14866 format %{ "evcvttsd2sisq $dst, $src" %}
14867 ins_encode %{
14868 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14869 %}
14870 ins_pipe(pipe_slow);
14871 %}
14872
14873 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14874 %{
14875 predicate(VM_Version::supports_avx10_2());
14876 match(Set dst (ConvD2L (LoadD src)));
14877 format %{ "evcvttsd2sisq $dst, $src" %}
14878 ins_encode %{
14879 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14880 %}
14881 ins_pipe(pipe_slow);
14882 %}
14883
14884 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14885 %{
14886 match(Set dst (RoundD src));
14887 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14888 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14889 ins_encode %{
14890 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14891 %}
14892 ins_pipe(pipe_slow);
14893 %}
14894
14895 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14896 %{
14897 match(Set dst (RoundF src));
14898 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14899 format %{ "round_float $dst,$src" %}
14900 ins_encode %{
14901 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14902 %}
14903 ins_pipe(pipe_slow);
14904 %}
14905
14906 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14907 %{
14908 predicate(!UseXmmI2F);
14909 match(Set dst (ConvI2F src));
14910
14911 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14912 ins_encode %{
14913 if (UseAVX > 0) {
14914 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14915 }
14916 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14917 %}
14918 ins_pipe(pipe_slow); // XXX
14919 %}
14920
14921 instruct convI2F_reg_mem(regF dst, memory src)
14922 %{
14923 predicate(UseAVX == 0);
14924 match(Set dst (ConvI2F (LoadI src)));
14925
14926 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14927 ins_encode %{
14928 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14929 %}
14930 ins_pipe(pipe_slow); // XXX
14931 %}
14932
14933 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14934 %{
14935 predicate(!UseXmmI2D);
14936 match(Set dst (ConvI2D src));
14937
14938 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14939 ins_encode %{
14940 if (UseAVX > 0) {
14941 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14942 }
14943 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14944 %}
14945 ins_pipe(pipe_slow); // XXX
14946 %}
14947
14948 instruct convI2D_reg_mem(regD dst, memory src)
14949 %{
14950 predicate(UseAVX == 0);
14951 match(Set dst (ConvI2D (LoadI src)));
14952
14953 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14954 ins_encode %{
14955 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14956 %}
14957 ins_pipe(pipe_slow); // XXX
14958 %}
14959
14960 instruct convXI2F_reg(regF dst, rRegI src)
14961 %{
14962 predicate(UseXmmI2F);
14963 match(Set dst (ConvI2F src));
14964
14965 format %{ "movdl $dst, $src\n\t"
14966 "cvtdq2psl $dst, $dst\t# i2f" %}
14967 ins_encode %{
14968 __ movdl($dst$$XMMRegister, $src$$Register);
14969 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14970 %}
14971 ins_pipe(pipe_slow); // XXX
14972 %}
14973
14974 instruct convXI2D_reg(regD dst, rRegI src)
14975 %{
14976 predicate(UseXmmI2D);
14977 match(Set dst (ConvI2D src));
14978
14979 format %{ "movdl $dst, $src\n\t"
14980 "cvtdq2pdl $dst, $dst\t# i2d" %}
14981 ins_encode %{
14982 __ movdl($dst$$XMMRegister, $src$$Register);
14983 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14984 %}
14985 ins_pipe(pipe_slow); // XXX
14986 %}
14987
14988 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14989 %{
14990 match(Set dst (ConvL2F src));
14991
14992 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14993 ins_encode %{
14994 if (UseAVX > 0) {
14995 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14996 }
14997 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14998 %}
14999 ins_pipe(pipe_slow); // XXX
15000 %}
15001
15002 instruct convL2F_reg_mem(regF dst, memory src)
15003 %{
15004 predicate(UseAVX == 0);
15005 match(Set dst (ConvL2F (LoadL src)));
15006
15007 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
15008 ins_encode %{
15009 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
15010 %}
15011 ins_pipe(pipe_slow); // XXX
15012 %}
15013
15014 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
15015 %{
15016 match(Set dst (ConvL2D src));
15017
15018 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15019 ins_encode %{
15020 if (UseAVX > 0) {
15021 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
15022 }
15023 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
15024 %}
15025 ins_pipe(pipe_slow); // XXX
15026 %}
15027
15028 instruct convL2D_reg_mem(regD dst, memory src)
15029 %{
15030 predicate(UseAVX == 0);
15031 match(Set dst (ConvL2D (LoadL src)));
15032
15033 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15034 ins_encode %{
15035 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15036 %}
15037 ins_pipe(pipe_slow); // XXX
15038 %}
15039
15040 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15041 %{
15042 match(Set dst (ConvI2L src));
15043
15044 ins_cost(125);
15045 format %{ "movslq $dst, $src\t# i2l" %}
15046 ins_encode %{
15047 __ movslq($dst$$Register, $src$$Register);
15048 %}
15049 ins_pipe(ialu_reg_reg);
15050 %}
15051
15052 // Zero-extend convert int to long
15053 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15054 %{
15055 match(Set dst (AndL (ConvI2L src) mask));
15056
15057 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15058 ins_encode %{
15059 if ($dst$$reg != $src$$reg) {
15060 __ movl($dst$$Register, $src$$Register);
15061 }
15062 %}
15063 ins_pipe(ialu_reg_reg);
15064 %}
15065
15066 // Zero-extend convert int to long
15067 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15068 %{
15069 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15070
15071 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15072 ins_encode %{
15073 __ movl($dst$$Register, $src$$Address);
15074 %}
15075 ins_pipe(ialu_reg_mem);
15076 %}
15077
15078 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15079 %{
15080 match(Set dst (AndL src mask));
15081
15082 format %{ "movl $dst, $src\t# zero-extend long" %}
15083 ins_encode %{
15084 __ movl($dst$$Register, $src$$Register);
15085 %}
15086 ins_pipe(ialu_reg_reg);
15087 %}
15088
15089 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15090 %{
15091 match(Set dst (ConvL2I src));
15092
15093 format %{ "movl $dst, $src\t# l2i" %}
15094 ins_encode %{
15095 __ movl($dst$$Register, $src$$Register);
15096 %}
15097 ins_pipe(ialu_reg_reg);
15098 %}
15099
15100
15101 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15102 match(Set dst (MoveF2I src));
15103 effect(DEF dst, USE src);
15104
15105 ins_cost(125);
15106 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15107 ins_encode %{
15108 __ movl($dst$$Register, Address(rsp, $src$$disp));
15109 %}
15110 ins_pipe(ialu_reg_mem);
15111 %}
15112
15113 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15114 match(Set dst (MoveI2F src));
15115 effect(DEF dst, USE src);
15116
15117 ins_cost(125);
15118 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15119 ins_encode %{
15120 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15121 %}
15122 ins_pipe(pipe_slow);
15123 %}
15124
15125 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15126 match(Set dst (MoveD2L src));
15127 effect(DEF dst, USE src);
15128
15129 ins_cost(125);
15130 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15131 ins_encode %{
15132 __ movq($dst$$Register, Address(rsp, $src$$disp));
15133 %}
15134 ins_pipe(ialu_reg_mem);
15135 %}
15136
15137 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15138 predicate(!UseXmmLoadAndClearUpper);
15139 match(Set dst (MoveL2D src));
15140 effect(DEF dst, USE src);
15141
15142 ins_cost(125);
15143 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15144 ins_encode %{
15145 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15146 %}
15147 ins_pipe(pipe_slow);
15148 %}
15149
15150 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15151 predicate(UseXmmLoadAndClearUpper);
15152 match(Set dst (MoveL2D src));
15153 effect(DEF dst, USE src);
15154
15155 ins_cost(125);
15156 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15157 ins_encode %{
15158 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15159 %}
15160 ins_pipe(pipe_slow);
15161 %}
15162
15163
15164 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15165 match(Set dst (MoveF2I src));
15166 effect(DEF dst, USE src);
15167
15168 ins_cost(95); // XXX
15169 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15170 ins_encode %{
15171 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15172 %}
15173 ins_pipe(pipe_slow);
15174 %}
15175
15176 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15177 match(Set dst (MoveI2F src));
15178 effect(DEF dst, USE src);
15179
15180 ins_cost(100);
15181 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15182 ins_encode %{
15183 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15184 %}
15185 ins_pipe( ialu_mem_reg );
15186 %}
15187
15188 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15189 match(Set dst (MoveD2L src));
15190 effect(DEF dst, USE src);
15191
15192 ins_cost(95); // XXX
15193 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15194 ins_encode %{
15195 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15196 %}
15197 ins_pipe(pipe_slow);
15198 %}
15199
15200 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15201 match(Set dst (MoveL2D src));
15202 effect(DEF dst, USE src);
15203
15204 ins_cost(100);
15205 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15206 ins_encode %{
15207 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15208 %}
15209 ins_pipe(ialu_mem_reg);
15210 %}
15211
15212 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15213 match(Set dst (MoveF2I src));
15214 effect(DEF dst, USE src);
15215 ins_cost(85);
15216 format %{ "movd $dst,$src\t# MoveF2I" %}
15217 ins_encode %{
15218 __ movdl($dst$$Register, $src$$XMMRegister);
15219 %}
15220 ins_pipe( pipe_slow );
15221 %}
15222
15223 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15224 match(Set dst (MoveD2L src));
15225 effect(DEF dst, USE src);
15226 ins_cost(85);
15227 format %{ "movd $dst,$src\t# MoveD2L" %}
15228 ins_encode %{
15229 __ movdq($dst$$Register, $src$$XMMRegister);
15230 %}
15231 ins_pipe( pipe_slow );
15232 %}
15233
15234 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15235 match(Set dst (MoveI2F src));
15236 effect(DEF dst, USE src);
15237 ins_cost(100);
15238 format %{ "movd $dst,$src\t# MoveI2F" %}
15239 ins_encode %{
15240 __ movdl($dst$$XMMRegister, $src$$Register);
15241 %}
15242 ins_pipe( pipe_slow );
15243 %}
15244
15245 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15246 match(Set dst (MoveL2D src));
15247 effect(DEF dst, USE src);
15248 ins_cost(100);
15249 format %{ "movd $dst,$src\t# MoveL2D" %}
15250 ins_encode %{
15251 __ movdq($dst$$XMMRegister, $src$$Register);
15252 %}
15253 ins_pipe( pipe_slow );
15254 %}
15255
15256
15257 // Fast clearing of an array
15258 // Small non-constant lenght ClearArray for non-AVX512 targets.
15259 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15260 Universe dummy, rFlagsReg cr)
15261 %{
15262 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15263 match(Set dummy (ClearArray (Binary cnt base) val));
15264 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15265
15266 format %{ $$template
15267 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15268 $$emit$$"jg LARGE\n\t"
15269 $$emit$$"dec rcx\n\t"
15270 $$emit$$"js DONE\t# Zero length\n\t"
15271 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge LOOP\n\t"
15274 $$emit$$"jmp DONE\n\t"
15275 $$emit$$"# LARGE:\n\t"
15276 if (UseFastStosb) {
15277 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15278 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279 } else if (UseXMMForObjInit) {
15280 $$emit$$"movdq $tmp, $val\n\t"
15281 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15282 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15283 $$emit$$"jmpq L_zero_64_bytes\n\t"
15284 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15285 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15286 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15287 $$emit$$"add 0x40,rax\n\t"
15288 $$emit$$"# L_zero_64_bytes:\n\t"
15289 $$emit$$"sub 0x8,rcx\n\t"
15290 $$emit$$"jge L_loop\n\t"
15291 $$emit$$"add 0x4,rcx\n\t"
15292 $$emit$$"jl L_tail\n\t"
15293 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15294 $$emit$$"add 0x20,rax\n\t"
15295 $$emit$$"sub 0x4,rcx\n\t"
15296 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15297 $$emit$$"add 0x4,rcx\n\t"
15298 $$emit$$"jle L_end\n\t"
15299 $$emit$$"dec rcx\n\t"
15300 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15301 $$emit$$"vmovq xmm0,(rax)\n\t"
15302 $$emit$$"add 0x8,rax\n\t"
15303 $$emit$$"dec rcx\n\t"
15304 $$emit$$"jge L_sloop\n\t"
15305 $$emit$$"# L_end:\n\t"
15306 } else {
15307 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15308 }
15309 $$emit$$"# DONE"
15310 %}
15311 ins_encode %{
15312 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15313 $tmp$$XMMRegister, false, false);
15314 %}
15315 ins_pipe(pipe_slow);
15316 %}
15317
15318 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15319 Universe dummy, rFlagsReg cr)
15320 %{
15321 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15322 match(Set dummy (ClearArray (Binary cnt base) val));
15323 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15324
15325 format %{ $$template
15326 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15327 $$emit$$"jg LARGE\n\t"
15328 $$emit$$"dec rcx\n\t"
15329 $$emit$$"js DONE\t# Zero length\n\t"
15330 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15331 $$emit$$"dec rcx\n\t"
15332 $$emit$$"jge LOOP\n\t"
15333 $$emit$$"jmp DONE\n\t"
15334 $$emit$$"# LARGE:\n\t"
15335 if (UseXMMForObjInit) {
15336 $$emit$$"movdq $tmp, $val\n\t"
15337 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15338 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15339 $$emit$$"jmpq L_zero_64_bytes\n\t"
15340 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15341 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15342 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15343 $$emit$$"add 0x40,rax\n\t"
15344 $$emit$$"# L_zero_64_bytes:\n\t"
15345 $$emit$$"sub 0x8,rcx\n\t"
15346 $$emit$$"jge L_loop\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jl L_tail\n\t"
15349 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15350 $$emit$$"add 0x20,rax\n\t"
15351 $$emit$$"sub 0x4,rcx\n\t"
15352 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15353 $$emit$$"add 0x4,rcx\n\t"
15354 $$emit$$"jle L_end\n\t"
15355 $$emit$$"dec rcx\n\t"
15356 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15357 $$emit$$"vmovq xmm0,(rax)\n\t"
15358 $$emit$$"add 0x8,rax\n\t"
15359 $$emit$$"dec rcx\n\t"
15360 $$emit$$"jge L_sloop\n\t"
15361 $$emit$$"# L_end:\n\t"
15362 } else {
15363 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15364 }
15365 $$emit$$"# DONE"
15366 %}
15367 ins_encode %{
15368 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15369 $tmp$$XMMRegister, false, true);
15370 %}
15371 ins_pipe(pipe_slow);
15372 %}
15373
15374 // Small non-constant length ClearArray for AVX512 targets.
15375 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15376 Universe dummy, rFlagsReg cr)
15377 %{
15378 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15379 match(Set dummy (ClearArray (Binary cnt base) val));
15380 ins_cost(125);
15381 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15382
15383 format %{ $$template
15384 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15385 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15386 $$emit$$"jg LARGE\n\t"
15387 $$emit$$"dec rcx\n\t"
15388 $$emit$$"js DONE\t# Zero length\n\t"
15389 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15390 $$emit$$"dec rcx\n\t"
15391 $$emit$$"jge LOOP\n\t"
15392 $$emit$$"jmp DONE\n\t"
15393 $$emit$$"# LARGE:\n\t"
15394 if (UseFastStosb) {
15395 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15396 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15397 } else if (UseXMMForObjInit) {
15398 $$emit$$"mov rdi,rax\n\t"
15399 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15400 $$emit$$"jmpq L_zero_64_bytes\n\t"
15401 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15402 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15403 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15404 $$emit$$"add 0x40,rax\n\t"
15405 $$emit$$"# L_zero_64_bytes:\n\t"
15406 $$emit$$"sub 0x8,rcx\n\t"
15407 $$emit$$"jge L_loop\n\t"
15408 $$emit$$"add 0x4,rcx\n\t"
15409 $$emit$$"jl L_tail\n\t"
15410 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411 $$emit$$"add 0x20,rax\n\t"
15412 $$emit$$"sub 0x4,rcx\n\t"
15413 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15414 $$emit$$"add 0x4,rcx\n\t"
15415 $$emit$$"jle L_end\n\t"
15416 $$emit$$"dec rcx\n\t"
15417 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15418 $$emit$$"vmovq xmm0,(rax)\n\t"
15419 $$emit$$"add 0x8,rax\n\t"
15420 $$emit$$"dec rcx\n\t"
15421 $$emit$$"jge L_sloop\n\t"
15422 $$emit$$"# L_end:\n\t"
15423 } else {
15424 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15425 }
15426 $$emit$$"# DONE"
15427 %}
15428 ins_encode %{
15429 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15430 $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
15431 %}
15432 ins_pipe(pipe_slow);
15433 %}
15434
15435 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15436 Universe dummy, rFlagsReg cr)
15437 %{
15438 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15439 match(Set dummy (ClearArray (Binary cnt base) val));
15440 ins_cost(125);
15441 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15442
15443 format %{ $$template
15444 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15445 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15446 $$emit$$"jg LARGE\n\t"
15447 $$emit$$"dec rcx\n\t"
15448 $$emit$$"js DONE\t# Zero length\n\t"
15449 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15450 $$emit$$"dec rcx\n\t"
15451 $$emit$$"jge LOOP\n\t"
15452 $$emit$$"jmp DONE\n\t"
15453 $$emit$$"# LARGE:\n\t"
15454 if (UseFastStosb) {
15455 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15456 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15457 } else if (UseXMMForObjInit) {
15458 $$emit$$"mov rdi,rax\n\t"
15459 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15460 $$emit$$"jmpq L_zero_64_bytes\n\t"
15461 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15462 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15463 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15464 $$emit$$"add 0x40,rax\n\t"
15465 $$emit$$"# L_zero_64_bytes:\n\t"
15466 $$emit$$"sub 0x8,rcx\n\t"
15467 $$emit$$"jge L_loop\n\t"
15468 $$emit$$"add 0x4,rcx\n\t"
15469 $$emit$$"jl L_tail\n\t"
15470 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15471 $$emit$$"add 0x20,rax\n\t"
15472 $$emit$$"sub 0x4,rcx\n\t"
15473 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15474 $$emit$$"add 0x4,rcx\n\t"
15475 $$emit$$"jle L_end\n\t"
15476 $$emit$$"dec rcx\n\t"
15477 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15478 $$emit$$"vmovq xmm0,(rax)\n\t"
15479 $$emit$$"add 0x8,rax\n\t"
15480 $$emit$$"dec rcx\n\t"
15481 $$emit$$"jge L_sloop\n\t"
15482 $$emit$$"# L_end:\n\t"
15483 } else {
15484 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15485 }
15486 $$emit$$"# DONE"
15487 %}
15488 ins_encode %{
15489 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15490 $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
15491 %}
15492 ins_pipe(pipe_slow);
15493 %}
15494
15495 // Large non-constant length ClearArray for non-AVX512 targets.
15496 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15497 Universe dummy, rFlagsReg cr)
15498 %{
15499 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15500 match(Set dummy (ClearArray (Binary cnt base) val));
15501 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15502
15503 format %{ $$template
15504 if (UseFastStosb) {
15505 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15506 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15507 } else if (UseXMMForObjInit) {
15508 $$emit$$"movdq $tmp, $val\n\t"
15509 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15510 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15511 $$emit$$"jmpq L_zero_64_bytes\n\t"
15512 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15513 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15514 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15515 $$emit$$"add 0x40,rax\n\t"
15516 $$emit$$"# L_zero_64_bytes:\n\t"
15517 $$emit$$"sub 0x8,rcx\n\t"
15518 $$emit$$"jge L_loop\n\t"
15519 $$emit$$"add 0x4,rcx\n\t"
15520 $$emit$$"jl L_tail\n\t"
15521 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15522 $$emit$$"add 0x20,rax\n\t"
15523 $$emit$$"sub 0x4,rcx\n\t"
15524 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15525 $$emit$$"add 0x4,rcx\n\t"
15526 $$emit$$"jle L_end\n\t"
15527 $$emit$$"dec rcx\n\t"
15528 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15529 $$emit$$"vmovq xmm0,(rax)\n\t"
15530 $$emit$$"add 0x8,rax\n\t"
15531 $$emit$$"dec rcx\n\t"
15532 $$emit$$"jge L_sloop\n\t"
15533 $$emit$$"# L_end:\n\t"
15534 } else {
15535 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15536 }
15537 %}
15538 ins_encode %{
15539 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15540 $tmp$$XMMRegister, true, false);
15541 %}
15542 ins_pipe(pipe_slow);
15543 %}
15544
15545 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
15546 Universe dummy, rFlagsReg cr)
15547 %{
15548 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
15549 match(Set dummy (ClearArray (Binary cnt base) val));
15550 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
15551
15552 format %{ $$template
15553 if (UseXMMForObjInit) {
15554 $$emit$$"movdq $tmp, $val\n\t"
15555 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
15556 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
15557 $$emit$$"jmpq L_zero_64_bytes\n\t"
15558 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15559 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15560 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
15561 $$emit$$"add 0x40,rax\n\t"
15562 $$emit$$"# L_zero_64_bytes:\n\t"
15563 $$emit$$"sub 0x8,rcx\n\t"
15564 $$emit$$"jge L_loop\n\t"
15565 $$emit$$"add 0x4,rcx\n\t"
15566 $$emit$$"jl L_tail\n\t"
15567 $$emit$$"vmovdqu $tmp,(rax)\n\t"
15568 $$emit$$"add 0x20,rax\n\t"
15569 $$emit$$"sub 0x4,rcx\n\t"
15570 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15571 $$emit$$"add 0x4,rcx\n\t"
15572 $$emit$$"jle L_end\n\t"
15573 $$emit$$"dec rcx\n\t"
15574 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15575 $$emit$$"vmovq xmm0,(rax)\n\t"
15576 $$emit$$"add 0x8,rax\n\t"
15577 $$emit$$"dec rcx\n\t"
15578 $$emit$$"jge L_sloop\n\t"
15579 $$emit$$"# L_end:\n\t"
15580 } else {
15581 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15582 }
15583 %}
15584 ins_encode %{
15585 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15586 $tmp$$XMMRegister, true, true);
15587 %}
15588 ins_pipe(pipe_slow);
15589 %}
15590
15591 // Large non-constant length ClearArray for AVX512 targets.
15592 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15593 Universe dummy, rFlagsReg cr)
15594 %{
15595 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15596 match(Set dummy (ClearArray (Binary cnt base) val));
15597 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15598
15599 format %{ $$template
15600 if (UseFastStosb) {
15601 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15602 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15603 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15604 } else if (UseXMMForObjInit) {
15605 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15606 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15607 $$emit$$"jmpq L_zero_64_bytes\n\t"
15608 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15609 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15610 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15611 $$emit$$"add 0x40,rax\n\t"
15612 $$emit$$"# L_zero_64_bytes:\n\t"
15613 $$emit$$"sub 0x8,rcx\n\t"
15614 $$emit$$"jge L_loop\n\t"
15615 $$emit$$"add 0x4,rcx\n\t"
15616 $$emit$$"jl L_tail\n\t"
15617 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15618 $$emit$$"add 0x20,rax\n\t"
15619 $$emit$$"sub 0x4,rcx\n\t"
15620 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15621 $$emit$$"add 0x4,rcx\n\t"
15622 $$emit$$"jle L_end\n\t"
15623 $$emit$$"dec rcx\n\t"
15624 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15625 $$emit$$"vmovq xmm0,(rax)\n\t"
15626 $$emit$$"add 0x8,rax\n\t"
15627 $$emit$$"dec rcx\n\t"
15628 $$emit$$"jge L_sloop\n\t"
15629 $$emit$$"# L_end:\n\t"
15630 } else {
15631 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15632 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15633 }
15634 %}
15635 ins_encode %{
15636 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15637 $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
15638 %}
15639 ins_pipe(pipe_slow);
15640 %}
15641
15642 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
15643 Universe dummy, rFlagsReg cr)
15644 %{
15645 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
15646 match(Set dummy (ClearArray (Binary cnt base) val));
15647 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
15648
15649 format %{ $$template
15650 if (UseFastStosb) {
15651 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15652 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15653 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15654 } else if (UseXMMForObjInit) {
15655 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15656 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15657 $$emit$$"jmpq L_zero_64_bytes\n\t"
15658 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15659 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15660 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15661 $$emit$$"add 0x40,rax\n\t"
15662 $$emit$$"# L_zero_64_bytes:\n\t"
15663 $$emit$$"sub 0x8,rcx\n\t"
15664 $$emit$$"jge L_loop\n\t"
15665 $$emit$$"add 0x4,rcx\n\t"
15666 $$emit$$"jl L_tail\n\t"
15667 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15668 $$emit$$"add 0x20,rax\n\t"
15669 $$emit$$"sub 0x4,rcx\n\t"
15670 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15671 $$emit$$"add 0x4,rcx\n\t"
15672 $$emit$$"jle L_end\n\t"
15673 $$emit$$"dec rcx\n\t"
15674 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15675 $$emit$$"vmovq xmm0,(rax)\n\t"
15676 $$emit$$"add 0x8,rax\n\t"
15677 $$emit$$"dec rcx\n\t"
15678 $$emit$$"jge L_sloop\n\t"
15679 $$emit$$"# L_end:\n\t"
15680 } else {
15681 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15682 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15683 }
15684 %}
15685 ins_encode %{
15686 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
15687 $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
15688 %}
15689 ins_pipe(pipe_slow);
15690 %}
15691
15692 // Small constant length ClearArray for AVX512 targets.
15693 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
15694 %{
15695 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
15696 ((MaxVectorSize >= 32) && VM_Version::supports_avx512vl()));
15697 match(Set dummy (ClearArray (Binary cnt base) val));
15698 ins_cost(100);
15699 effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
15700 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15701 ins_encode %{
15702 __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15703 %}
15704 ins_pipe(pipe_slow);
15705 %}
15706
15707 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15708 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15709 %{
15710 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15711 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15712 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15713
15714 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15715 ins_encode %{
15716 __ string_compare($str1$$Register, $str2$$Register,
15717 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15718 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15719 %}
15720 ins_pipe( pipe_slow );
15721 %}
15722
15723 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15724 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15725 %{
15726 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15727 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15728 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15729
15730 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15731 ins_encode %{
15732 __ string_compare($str1$$Register, $str2$$Register,
15733 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15734 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15735 %}
15736 ins_pipe( pipe_slow );
15737 %}
15738
15739 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15740 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15741 %{
15742 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15743 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15744 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15745
15746 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15747 ins_encode %{
15748 __ string_compare($str1$$Register, $str2$$Register,
15749 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15750 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15751 %}
15752 ins_pipe( pipe_slow );
15753 %}
15754
15755 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15756 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15757 %{
15758 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15759 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15760 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15761
15762 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15763 ins_encode %{
15764 __ string_compare($str1$$Register, $str2$$Register,
15765 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15766 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15767 %}
15768 ins_pipe( pipe_slow );
15769 %}
15770
15771 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15772 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15773 %{
15774 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15775 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15776 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15777
15778 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15779 ins_encode %{
15780 __ string_compare($str1$$Register, $str2$$Register,
15781 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15782 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15783 %}
15784 ins_pipe( pipe_slow );
15785 %}
15786
15787 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15788 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15789 %{
15790 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15791 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15792 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15793
15794 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15795 ins_encode %{
15796 __ string_compare($str1$$Register, $str2$$Register,
15797 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15798 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15799 %}
15800 ins_pipe( pipe_slow );
15801 %}
15802
15803 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15804 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15805 %{
15806 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15807 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15808 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15809
15810 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15811 ins_encode %{
15812 __ string_compare($str2$$Register, $str1$$Register,
15813 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15814 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15815 %}
15816 ins_pipe( pipe_slow );
15817 %}
15818
15819 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15820 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15821 %{
15822 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15823 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15824 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15825
15826 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15827 ins_encode %{
15828 __ string_compare($str2$$Register, $str1$$Register,
15829 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15830 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15831 %}
15832 ins_pipe( pipe_slow );
15833 %}
15834
15835 // fast search of substring with known size.
15836 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15837 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15838 %{
15839 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15840 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15841 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15842
15843 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15844 ins_encode %{
15845 int icnt2 = (int)$int_cnt2$$constant;
15846 if (icnt2 >= 16) {
15847 // IndexOf for constant substrings with size >= 16 elements
15848 // which don't need to be loaded through stack.
15849 __ string_indexofC8($str1$$Register, $str2$$Register,
15850 $cnt1$$Register, $cnt2$$Register,
15851 icnt2, $result$$Register,
15852 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15853 } else {
15854 // Small strings are loaded through stack if they cross page boundary.
15855 __ string_indexof($str1$$Register, $str2$$Register,
15856 $cnt1$$Register, $cnt2$$Register,
15857 icnt2, $result$$Register,
15858 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15859 }
15860 %}
15861 ins_pipe( pipe_slow );
15862 %}
15863
15864 // fast search of substring with known size.
15865 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15866 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15867 %{
15868 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15869 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15870 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15871
15872 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15873 ins_encode %{
15874 int icnt2 = (int)$int_cnt2$$constant;
15875 if (icnt2 >= 8) {
15876 // IndexOf for constant substrings with size >= 8 elements
15877 // which don't need to be loaded through stack.
15878 __ string_indexofC8($str1$$Register, $str2$$Register,
15879 $cnt1$$Register, $cnt2$$Register,
15880 icnt2, $result$$Register,
15881 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15882 } else {
15883 // Small strings are loaded through stack if they cross page boundary.
15884 __ string_indexof($str1$$Register, $str2$$Register,
15885 $cnt1$$Register, $cnt2$$Register,
15886 icnt2, $result$$Register,
15887 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15888 }
15889 %}
15890 ins_pipe( pipe_slow );
15891 %}
15892
15893 // fast search of substring with known size.
15894 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15895 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15896 %{
15897 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15898 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15899 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15900
15901 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15902 ins_encode %{
15903 int icnt2 = (int)$int_cnt2$$constant;
15904 if (icnt2 >= 8) {
15905 // IndexOf for constant substrings with size >= 8 elements
15906 // which don't need to be loaded through stack.
15907 __ string_indexofC8($str1$$Register, $str2$$Register,
15908 $cnt1$$Register, $cnt2$$Register,
15909 icnt2, $result$$Register,
15910 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15911 } else {
15912 // Small strings are loaded through stack if they cross page boundary.
15913 __ string_indexof($str1$$Register, $str2$$Register,
15914 $cnt1$$Register, $cnt2$$Register,
15915 icnt2, $result$$Register,
15916 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15917 }
15918 %}
15919 ins_pipe( pipe_slow );
15920 %}
15921
15922 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15923 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15924 %{
15925 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15926 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15927 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15928
15929 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15930 ins_encode %{
15931 __ string_indexof($str1$$Register, $str2$$Register,
15932 $cnt1$$Register, $cnt2$$Register,
15933 (-1), $result$$Register,
15934 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15935 %}
15936 ins_pipe( pipe_slow );
15937 %}
15938
15939 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15940 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15941 %{
15942 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15943 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15944 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15945
15946 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15947 ins_encode %{
15948 __ string_indexof($str1$$Register, $str2$$Register,
15949 $cnt1$$Register, $cnt2$$Register,
15950 (-1), $result$$Register,
15951 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15952 %}
15953 ins_pipe( pipe_slow );
15954 %}
15955
15956 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15957 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15958 %{
15959 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15960 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15961 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15962
15963 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15964 ins_encode %{
15965 __ string_indexof($str1$$Register, $str2$$Register,
15966 $cnt1$$Register, $cnt2$$Register,
15967 (-1), $result$$Register,
15968 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15969 %}
15970 ins_pipe( pipe_slow );
15971 %}
15972
15973 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15974 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15975 %{
15976 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15977 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15978 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15979 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15980 ins_encode %{
15981 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15982 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15983 %}
15984 ins_pipe( pipe_slow );
15985 %}
15986
15987 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15988 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15989 %{
15990 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15991 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15992 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15993 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15994 ins_encode %{
15995 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15996 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15997 %}
15998 ins_pipe( pipe_slow );
15999 %}
16000
16001 // fast string equals
16002 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16003 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
16004 %{
16005 predicate(!VM_Version::supports_avx512vlbw());
16006 match(Set result (StrEquals (Binary str1 str2) cnt));
16007 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16008
16009 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16010 ins_encode %{
16011 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16012 $cnt$$Register, $result$$Register, $tmp3$$Register,
16013 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16014 %}
16015 ins_pipe( pipe_slow );
16016 %}
16017
16018 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
16019 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
16020 %{
16021 predicate(VM_Version::supports_avx512vlbw());
16022 match(Set result (StrEquals (Binary str1 str2) cnt));
16023 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
16024
16025 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16026 ins_encode %{
16027 __ arrays_equals(false, $str1$$Register, $str2$$Register,
16028 $cnt$$Register, $result$$Register, $tmp3$$Register,
16029 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16030 %}
16031 ins_pipe( pipe_slow );
16032 %}
16033
16034 // fast array equals
16035 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16036 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16037 %{
16038 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16039 match(Set result (AryEq ary1 ary2));
16040 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16041
16042 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16043 ins_encode %{
16044 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16045 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16046 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
16047 %}
16048 ins_pipe( pipe_slow );
16049 %}
16050
16051 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16052 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16053 %{
16054 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16055 match(Set result (AryEq ary1 ary2));
16056 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16057
16058 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16059 ins_encode %{
16060 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16061 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16062 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
16063 %}
16064 ins_pipe( pipe_slow );
16065 %}
16066
16067 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16068 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16069 %{
16070 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16071 match(Set result (AryEq ary1 ary2));
16072 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16073
16074 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16075 ins_encode %{
16076 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16077 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16078 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
16079 %}
16080 ins_pipe( pipe_slow );
16081 %}
16082
16083 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
16084 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
16085 %{
16086 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16087 match(Set result (AryEq ary1 ary2));
16088 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
16089
16090 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
16091 ins_encode %{
16092 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
16093 $tmp3$$Register, $result$$Register, $tmp4$$Register,
16094 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
16095 %}
16096 ins_pipe( pipe_slow );
16097 %}
16098
16099 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
16100 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
16101 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
16102 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
16103 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
16104 %{
16105 predicate(UseAVX >= 2);
16106 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
16107 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
16108 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
16109 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
16110 USE basic_type, KILL cr);
16111
16112 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
16113 ins_encode %{
16114 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
16115 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
16116 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
16117 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
16118 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
16119 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
16120 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
16121 %}
16122 ins_pipe( pipe_slow );
16123 %}
16124
16125 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16126 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
16127 %{
16128 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16129 match(Set result (CountPositives ary1 len));
16130 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16131
16132 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16133 ins_encode %{
16134 __ count_positives($ary1$$Register, $len$$Register,
16135 $result$$Register, $tmp3$$Register,
16136 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
16137 %}
16138 ins_pipe( pipe_slow );
16139 %}
16140
16141 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
16142 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
16143 %{
16144 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16145 match(Set result (CountPositives ary1 len));
16146 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
16147
16148 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
16149 ins_encode %{
16150 __ count_positives($ary1$$Register, $len$$Register,
16151 $result$$Register, $tmp3$$Register,
16152 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
16153 %}
16154 ins_pipe( pipe_slow );
16155 %}
16156
16157 // fast char[] to byte[] compression
16158 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16159 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16160 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16161 match(Set result (StrCompressedCopy src (Binary dst len)));
16162 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
16163 USE_KILL len, KILL tmp5, KILL cr);
16164
16165 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16166 ins_encode %{
16167 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16168 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16169 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16170 knoreg, knoreg);
16171 %}
16172 ins_pipe( pipe_slow );
16173 %}
16174
16175 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
16176 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16177 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16178 match(Set result (StrCompressedCopy src (Binary dst len)));
16179 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
16180 USE_KILL len, KILL tmp5, KILL cr);
16181
16182 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
16183 ins_encode %{
16184 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16185 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16186 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
16187 $ktmp1$$KRegister, $ktmp2$$KRegister);
16188 %}
16189 ins_pipe( pipe_slow );
16190 %}
16191 // fast byte[] to char[] inflation
16192 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16193 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
16194 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
16195 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16196 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16197
16198 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16199 ins_encode %{
16200 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16201 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
16202 %}
16203 ins_pipe( pipe_slow );
16204 %}
16205
16206 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16207 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
16208 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
16209 match(Set dummy (StrInflatedCopy src (Binary dst len)));
16210 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16211
16212 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
16213 ins_encode %{
16214 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16215 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
16216 %}
16217 ins_pipe( pipe_slow );
16218 %}
16219
16220 // encode char[] to byte[] in ISO_8859_1
16221 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16222 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16223 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16224 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
16225 match(Set result (EncodeISOArray src (Binary dst len)));
16226 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16227
16228 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16229 ins_encode %{
16230 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16231 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16232 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
16233 %}
16234 ins_pipe( pipe_slow );
16235 %}
16236
16237 // encode char[] to byte[] in ASCII
16238 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
16239 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16240 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16241 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16242 match(Set result (EncodeISOArray src (Binary dst len)));
16243 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16244
16245 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16246 ins_encode %{
16247 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16248 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16249 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16250 %}
16251 ins_pipe( pipe_slow );
16252 %}
16253
16254 //----------Overflow Math Instructions-----------------------------------------
16255
16256 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16257 %{
16258 match(Set cr (OverflowAddI op1 op2));
16259 effect(DEF cr, USE_KILL op1, USE op2);
16260
16261 format %{ "addl $op1, $op2\t# overflow check int" %}
16262
16263 ins_encode %{
16264 __ addl($op1$$Register, $op2$$Register);
16265 %}
16266 ins_pipe(ialu_reg_reg);
16267 %}
16268
16269 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16270 %{
16271 match(Set cr (OverflowAddI op1 op2));
16272 effect(DEF cr, USE_KILL op1, USE op2);
16273
16274 format %{ "addl $op1, $op2\t# overflow check int" %}
16275
16276 ins_encode %{
16277 __ addl($op1$$Register, $op2$$constant);
16278 %}
16279 ins_pipe(ialu_reg_reg);
16280 %}
16281
16282 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16283 %{
16284 match(Set cr (OverflowAddL op1 op2));
16285 effect(DEF cr, USE_KILL op1, USE op2);
16286
16287 format %{ "addq $op1, $op2\t# overflow check long" %}
16288 ins_encode %{
16289 __ addq($op1$$Register, $op2$$Register);
16290 %}
16291 ins_pipe(ialu_reg_reg);
16292 %}
16293
16294 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16295 %{
16296 match(Set cr (OverflowAddL op1 op2));
16297 effect(DEF cr, USE_KILL op1, USE op2);
16298
16299 format %{ "addq $op1, $op2\t# overflow check long" %}
16300 ins_encode %{
16301 __ addq($op1$$Register, $op2$$constant);
16302 %}
16303 ins_pipe(ialu_reg_reg);
16304 %}
16305
16306 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16307 %{
16308 match(Set cr (OverflowSubI op1 op2));
16309
16310 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16311 ins_encode %{
16312 __ cmpl($op1$$Register, $op2$$Register);
16313 %}
16314 ins_pipe(ialu_reg_reg);
16315 %}
16316
16317 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16318 %{
16319 match(Set cr (OverflowSubI op1 op2));
16320
16321 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16322 ins_encode %{
16323 __ cmpl($op1$$Register, $op2$$constant);
16324 %}
16325 ins_pipe(ialu_reg_reg);
16326 %}
16327
16328 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16329 %{
16330 match(Set cr (OverflowSubL op1 op2));
16331
16332 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16333 ins_encode %{
16334 __ cmpq($op1$$Register, $op2$$Register);
16335 %}
16336 ins_pipe(ialu_reg_reg);
16337 %}
16338
16339 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16340 %{
16341 match(Set cr (OverflowSubL op1 op2));
16342
16343 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16344 ins_encode %{
16345 __ cmpq($op1$$Register, $op2$$constant);
16346 %}
16347 ins_pipe(ialu_reg_reg);
16348 %}
16349
16350 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16351 %{
16352 match(Set cr (OverflowSubI zero op2));
16353 effect(DEF cr, USE_KILL op2);
16354
16355 format %{ "negl $op2\t# overflow check int" %}
16356 ins_encode %{
16357 __ negl($op2$$Register);
16358 %}
16359 ins_pipe(ialu_reg_reg);
16360 %}
16361
16362 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16363 %{
16364 match(Set cr (OverflowSubL zero op2));
16365 effect(DEF cr, USE_KILL op2);
16366
16367 format %{ "negq $op2\t# overflow check long" %}
16368 ins_encode %{
16369 __ negq($op2$$Register);
16370 %}
16371 ins_pipe(ialu_reg_reg);
16372 %}
16373
16374 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16375 %{
16376 match(Set cr (OverflowMulI op1 op2));
16377 effect(DEF cr, USE_KILL op1, USE op2);
16378
16379 format %{ "imull $op1, $op2\t# overflow check int" %}
16380 ins_encode %{
16381 __ imull($op1$$Register, $op2$$Register);
16382 %}
16383 ins_pipe(ialu_reg_reg_alu0);
16384 %}
16385
16386 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16387 %{
16388 match(Set cr (OverflowMulI op1 op2));
16389 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16390
16391 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16392 ins_encode %{
16393 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16394 %}
16395 ins_pipe(ialu_reg_reg_alu0);
16396 %}
16397
16398 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16399 %{
16400 match(Set cr (OverflowMulL op1 op2));
16401 effect(DEF cr, USE_KILL op1, USE op2);
16402
16403 format %{ "imulq $op1, $op2\t# overflow check long" %}
16404 ins_encode %{
16405 __ imulq($op1$$Register, $op2$$Register);
16406 %}
16407 ins_pipe(ialu_reg_reg_alu0);
16408 %}
16409
16410 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16411 %{
16412 match(Set cr (OverflowMulL op1 op2));
16413 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16414
16415 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16416 ins_encode %{
16417 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16418 %}
16419 ins_pipe(ialu_reg_reg_alu0);
16420 %}
16421
16422
16423 //----------Control Flow Instructions------------------------------------------
16424 // Signed compare Instructions
16425
16426 // XXX more variants!!
16427 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16428 %{
16429 match(Set cr (CmpI op1 op2));
16430 effect(DEF cr, USE op1, USE op2);
16431
16432 format %{ "cmpl $op1, $op2" %}
16433 ins_encode %{
16434 __ cmpl($op1$$Register, $op2$$Register);
16435 %}
16436 ins_pipe(ialu_cr_reg_reg);
16437 %}
16438
16439 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16440 %{
16441 match(Set cr (CmpI op1 op2));
16442
16443 format %{ "cmpl $op1, $op2" %}
16444 ins_encode %{
16445 __ cmpl($op1$$Register, $op2$$constant);
16446 %}
16447 ins_pipe(ialu_cr_reg_imm);
16448 %}
16449
16450 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16451 %{
16452 match(Set cr (CmpI op1 (LoadI op2)));
16453
16454 ins_cost(500); // XXX
16455 format %{ "cmpl $op1, $op2" %}
16456 ins_encode %{
16457 __ cmpl($op1$$Register, $op2$$Address);
16458 %}
16459 ins_pipe(ialu_cr_reg_mem);
16460 %}
16461
16462 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16463 %{
16464 match(Set cr (CmpI src zero));
16465
16466 format %{ "testl $src, $src" %}
16467 ins_encode %{
16468 __ testl($src$$Register, $src$$Register);
16469 %}
16470 ins_pipe(ialu_cr_reg_imm);
16471 %}
16472
16473 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16474 %{
16475 match(Set cr (CmpI (AndI src con) zero));
16476
16477 format %{ "testl $src, $con" %}
16478 ins_encode %{
16479 __ testl($src$$Register, $con$$constant);
16480 %}
16481 ins_pipe(ialu_cr_reg_imm);
16482 %}
16483
16484 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16485 %{
16486 match(Set cr (CmpI (AndI src1 src2) zero));
16487
16488 format %{ "testl $src1, $src2" %}
16489 ins_encode %{
16490 __ testl($src1$$Register, $src2$$Register);
16491 %}
16492 ins_pipe(ialu_cr_reg_imm);
16493 %}
16494
16495 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16496 %{
16497 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16498
16499 format %{ "testl $src, $mem" %}
16500 ins_encode %{
16501 __ testl($src$$Register, $mem$$Address);
16502 %}
16503 ins_pipe(ialu_cr_reg_mem);
16504 %}
16505
16506 // Unsigned compare Instructions; really, same as signed except they
16507 // produce an rFlagsRegU instead of rFlagsReg.
16508 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16509 %{
16510 match(Set cr (CmpU op1 op2));
16511
16512 format %{ "cmpl $op1, $op2\t# unsigned" %}
16513 ins_encode %{
16514 __ cmpl($op1$$Register, $op2$$Register);
16515 %}
16516 ins_pipe(ialu_cr_reg_reg);
16517 %}
16518
16519 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16520 %{
16521 match(Set cr (CmpU op1 op2));
16522
16523 format %{ "cmpl $op1, $op2\t# unsigned" %}
16524 ins_encode %{
16525 __ cmpl($op1$$Register, $op2$$constant);
16526 %}
16527 ins_pipe(ialu_cr_reg_imm);
16528 %}
16529
16530 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16531 %{
16532 match(Set cr (CmpU op1 (LoadI op2)));
16533
16534 ins_cost(500); // XXX
16535 format %{ "cmpl $op1, $op2\t# unsigned" %}
16536 ins_encode %{
16537 __ cmpl($op1$$Register, $op2$$Address);
16538 %}
16539 ins_pipe(ialu_cr_reg_mem);
16540 %}
16541
16542 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16543 %{
16544 match(Set cr (CmpU src zero));
16545
16546 format %{ "testl $src, $src\t# unsigned" %}
16547 ins_encode %{
16548 __ testl($src$$Register, $src$$Register);
16549 %}
16550 ins_pipe(ialu_cr_reg_imm);
16551 %}
16552
16553 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16554 %{
16555 match(Set cr (CmpP op1 op2));
16556
16557 format %{ "cmpq $op1, $op2\t# ptr" %}
16558 ins_encode %{
16559 __ cmpq($op1$$Register, $op2$$Register);
16560 %}
16561 ins_pipe(ialu_cr_reg_reg);
16562 %}
16563
16564 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16565 %{
16566 match(Set cr (CmpP op1 (LoadP op2)));
16567 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16568
16569 ins_cost(500); // XXX
16570 format %{ "cmpq $op1, $op2\t# ptr" %}
16571 ins_encode %{
16572 __ cmpq($op1$$Register, $op2$$Address);
16573 %}
16574 ins_pipe(ialu_cr_reg_mem);
16575 %}
16576
16577 // XXX this is generalized by compP_rReg_mem???
16578 // Compare raw pointer (used in out-of-heap check).
16579 // Only works because non-oop pointers must be raw pointers
16580 // and raw pointers have no anti-dependencies.
16581 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16582 %{
16583 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16584 n->in(2)->as_Load()->barrier_data() == 0);
16585 match(Set cr (CmpP op1 (LoadP op2)));
16586
16587 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16588 ins_encode %{
16589 __ cmpq($op1$$Register, $op2$$Address);
16590 %}
16591 ins_pipe(ialu_cr_reg_mem);
16592 %}
16593
16594 // This will generate a signed flags result. This should be OK since
16595 // any compare to a zero should be eq/neq.
16596 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16597 %{
16598 match(Set cr (CmpP src zero));
16599
16600 format %{ "testq $src, $src\t# ptr" %}
16601 ins_encode %{
16602 __ testq($src$$Register, $src$$Register);
16603 %}
16604 ins_pipe(ialu_cr_reg_imm);
16605 %}
16606
16607 // This will generate a signed flags result. This should be OK since
16608 // any compare to a zero should be eq/neq.
16609 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16610 %{
16611 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16612 n->in(1)->as_Load()->barrier_data() == 0);
16613 match(Set cr (CmpP (LoadP op) zero));
16614
16615 ins_cost(500); // XXX
16616 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16617 ins_encode %{
16618 __ testq($op$$Address, 0xFFFFFFFF);
16619 %}
16620 ins_pipe(ialu_cr_reg_imm);
16621 %}
16622
16623 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16624 %{
16625 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16626 n->in(1)->as_Load()->barrier_data() == 0);
16627 match(Set cr (CmpP (LoadP mem) zero));
16628
16629 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16630 ins_encode %{
16631 __ cmpq(r12, $mem$$Address);
16632 %}
16633 ins_pipe(ialu_cr_reg_mem);
16634 %}
16635
16636 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16637 %{
16638 match(Set cr (CmpN op1 op2));
16639
16640 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16641 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16642 ins_pipe(ialu_cr_reg_reg);
16643 %}
16644
16645 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16646 %{
16647 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16648 match(Set cr (CmpN src (LoadN mem)));
16649
16650 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16651 ins_encode %{
16652 __ cmpl($src$$Register, $mem$$Address);
16653 %}
16654 ins_pipe(ialu_cr_reg_mem);
16655 %}
16656
16657 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16658 match(Set cr (CmpN op1 op2));
16659
16660 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16661 ins_encode %{
16662 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16663 %}
16664 ins_pipe(ialu_cr_reg_imm);
16665 %}
16666
16667 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16668 %{
16669 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16670 match(Set cr (CmpN src (LoadN mem)));
16671
16672 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16673 ins_encode %{
16674 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16675 %}
16676 ins_pipe(ialu_cr_reg_mem);
16677 %}
16678
16679 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16680 match(Set cr (CmpN op1 op2));
16681
16682 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16683 ins_encode %{
16684 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16685 %}
16686 ins_pipe(ialu_cr_reg_imm);
16687 %}
16688
16689 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16690 %{
16691 predicate(!UseCompactObjectHeaders);
16692 match(Set cr (CmpN src (LoadNKlass mem)));
16693
16694 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16695 ins_encode %{
16696 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16697 %}
16698 ins_pipe(ialu_cr_reg_mem);
16699 %}
16700
16701 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16702 match(Set cr (CmpN src zero));
16703
16704 format %{ "testl $src, $src\t# compressed ptr" %}
16705 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16706 ins_pipe(ialu_cr_reg_imm);
16707 %}
16708
16709 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16710 %{
16711 predicate(CompressedOops::base() != nullptr &&
16712 n->in(1)->as_Load()->barrier_data() == 0);
16713 match(Set cr (CmpN (LoadN mem) zero));
16714
16715 ins_cost(500); // XXX
16716 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16717 ins_encode %{
16718 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16719 %}
16720 ins_pipe(ialu_cr_reg_mem);
16721 %}
16722
16723 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16724 %{
16725 predicate(CompressedOops::base() == nullptr &&
16726 n->in(1)->as_Load()->barrier_data() == 0);
16727 match(Set cr (CmpN (LoadN mem) zero));
16728
16729 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16730 ins_encode %{
16731 __ cmpl(r12, $mem$$Address);
16732 %}
16733 ins_pipe(ialu_cr_reg_mem);
16734 %}
16735
16736 // Yanked all unsigned pointer compare operations.
16737 // Pointer compares are done with CmpP which is already unsigned.
16738
16739 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16740 %{
16741 match(Set cr (CmpL op1 op2));
16742
16743 format %{ "cmpq $op1, $op2" %}
16744 ins_encode %{
16745 __ cmpq($op1$$Register, $op2$$Register);
16746 %}
16747 ins_pipe(ialu_cr_reg_reg);
16748 %}
16749
16750 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16751 %{
16752 match(Set cr (CmpL op1 op2));
16753
16754 format %{ "cmpq $op1, $op2" %}
16755 ins_encode %{
16756 __ cmpq($op1$$Register, $op2$$constant);
16757 %}
16758 ins_pipe(ialu_cr_reg_imm);
16759 %}
16760
16761 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16762 %{
16763 match(Set cr (CmpL op1 (LoadL op2)));
16764
16765 format %{ "cmpq $op1, $op2" %}
16766 ins_encode %{
16767 __ cmpq($op1$$Register, $op2$$Address);
16768 %}
16769 ins_pipe(ialu_cr_reg_mem);
16770 %}
16771
16772 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16773 %{
16774 match(Set cr (CmpL src zero));
16775
16776 format %{ "testq $src, $src" %}
16777 ins_encode %{
16778 __ testq($src$$Register, $src$$Register);
16779 %}
16780 ins_pipe(ialu_cr_reg_imm);
16781 %}
16782
16783 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16784 %{
16785 match(Set cr (CmpL (AndL src con) zero));
16786
16787 format %{ "testq $src, $con\t# long" %}
16788 ins_encode %{
16789 __ testq($src$$Register, $con$$constant);
16790 %}
16791 ins_pipe(ialu_cr_reg_imm);
16792 %}
16793
16794 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16795 %{
16796 match(Set cr (CmpL (AndL src1 src2) zero));
16797
16798 format %{ "testq $src1, $src2\t# long" %}
16799 ins_encode %{
16800 __ testq($src1$$Register, $src2$$Register);
16801 %}
16802 ins_pipe(ialu_cr_reg_imm);
16803 %}
16804
16805 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16806 %{
16807 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16808
16809 format %{ "testq $src, $mem" %}
16810 ins_encode %{
16811 __ testq($src$$Register, $mem$$Address);
16812 %}
16813 ins_pipe(ialu_cr_reg_mem);
16814 %}
16815
16816 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16817 %{
16818 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16819
16820 format %{ "testq $src, $mem" %}
16821 ins_encode %{
16822 __ testq($src$$Register, $mem$$Address);
16823 %}
16824 ins_pipe(ialu_cr_reg_mem);
16825 %}
16826
16827 // Manifest a CmpU result in an integer register. Very painful.
16828 // This is the test to avoid.
16829 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16830 %{
16831 match(Set dst (CmpU3 src1 src2));
16832 effect(KILL flags);
16833
16834 ins_cost(275); // XXX
16835 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16836 "movl $dst, -1\n\t"
16837 "jb,u done\n\t"
16838 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16839 "done:" %}
16840 ins_encode %{
16841 Label done;
16842 __ cmpl($src1$$Register, $src2$$Register);
16843 __ movl($dst$$Register, -1);
16844 __ jccb(Assembler::below, done);
16845 __ setcc(Assembler::notZero, $dst$$Register);
16846 __ bind(done);
16847 %}
16848 ins_pipe(pipe_slow);
16849 %}
16850
16851 // Manifest a CmpL result in an integer register. Very painful.
16852 // This is the test to avoid.
16853 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16854 %{
16855 match(Set dst (CmpL3 src1 src2));
16856 effect(KILL flags);
16857
16858 ins_cost(275); // XXX
16859 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16860 "movl $dst, -1\n\t"
16861 "jl,s done\n\t"
16862 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16863 "done:" %}
16864 ins_encode %{
16865 Label done;
16866 __ cmpq($src1$$Register, $src2$$Register);
16867 __ movl($dst$$Register, -1);
16868 __ jccb(Assembler::less, done);
16869 __ setcc(Assembler::notZero, $dst$$Register);
16870 __ bind(done);
16871 %}
16872 ins_pipe(pipe_slow);
16873 %}
16874
16875 // Manifest a CmpUL result in an integer register. Very painful.
16876 // This is the test to avoid.
16877 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16878 %{
16879 match(Set dst (CmpUL3 src1 src2));
16880 effect(KILL flags);
16881
16882 ins_cost(275); // XXX
16883 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16884 "movl $dst, -1\n\t"
16885 "jb,u done\n\t"
16886 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16887 "done:" %}
16888 ins_encode %{
16889 Label done;
16890 __ cmpq($src1$$Register, $src2$$Register);
16891 __ movl($dst$$Register, -1);
16892 __ jccb(Assembler::below, done);
16893 __ setcc(Assembler::notZero, $dst$$Register);
16894 __ bind(done);
16895 %}
16896 ins_pipe(pipe_slow);
16897 %}
16898
16899 // Unsigned long compare Instructions; really, same as signed long except they
16900 // produce an rFlagsRegU instead of rFlagsReg.
16901 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16902 %{
16903 match(Set cr (CmpUL op1 op2));
16904
16905 format %{ "cmpq $op1, $op2\t# unsigned" %}
16906 ins_encode %{
16907 __ cmpq($op1$$Register, $op2$$Register);
16908 %}
16909 ins_pipe(ialu_cr_reg_reg);
16910 %}
16911
16912 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16913 %{
16914 match(Set cr (CmpUL op1 op2));
16915
16916 format %{ "cmpq $op1, $op2\t# unsigned" %}
16917 ins_encode %{
16918 __ cmpq($op1$$Register, $op2$$constant);
16919 %}
16920 ins_pipe(ialu_cr_reg_imm);
16921 %}
16922
16923 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16924 %{
16925 match(Set cr (CmpUL op1 (LoadL op2)));
16926
16927 format %{ "cmpq $op1, $op2\t# unsigned" %}
16928 ins_encode %{
16929 __ cmpq($op1$$Register, $op2$$Address);
16930 %}
16931 ins_pipe(ialu_cr_reg_mem);
16932 %}
16933
16934 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16935 %{
16936 match(Set cr (CmpUL src zero));
16937
16938 format %{ "testq $src, $src\t# unsigned" %}
16939 ins_encode %{
16940 __ testq($src$$Register, $src$$Register);
16941 %}
16942 ins_pipe(ialu_cr_reg_imm);
16943 %}
16944
16945 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16946 %{
16947 match(Set cr (CmpI (LoadB mem) imm));
16948
16949 ins_cost(125);
16950 format %{ "cmpb $mem, $imm" %}
16951 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16952 ins_pipe(ialu_cr_reg_mem);
16953 %}
16954
16955 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16956 %{
16957 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16958
16959 ins_cost(125);
16960 format %{ "testb $mem, $imm\t# ubyte" %}
16961 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16962 ins_pipe(ialu_cr_reg_mem);
16963 %}
16964
16965 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16966 %{
16967 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16968
16969 ins_cost(125);
16970 format %{ "testb $mem, $imm\t# byte" %}
16971 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16972 ins_pipe(ialu_cr_reg_mem);
16973 %}
16974
16975 //----------Max and Min--------------------------------------------------------
16976 // Min Instructions
16977
16978 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16979 %{
16980 predicate(!UseAPX);
16981 effect(USE_DEF dst, USE src, USE cr);
16982
16983 format %{ "cmovlgt $dst, $src\t# min" %}
16984 ins_encode %{
16985 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16986 %}
16987 ins_pipe(pipe_cmov_reg);
16988 %}
16989
16990 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16991 %{
16992 predicate(UseAPX);
16993 effect(DEF dst, USE src1, USE src2, USE cr);
16994
16995 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16996 ins_encode %{
16997 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16998 %}
16999 ins_pipe(pipe_cmov_reg);
17000 %}
17001
17002 instruct minI_rReg(rRegI dst, rRegI src)
17003 %{
17004 predicate(!UseAPX);
17005 match(Set dst (MinI dst src));
17006
17007 ins_cost(200);
17008 expand %{
17009 rFlagsReg cr;
17010 compI_rReg(cr, dst, src);
17011 cmovI_reg_g(dst, src, cr);
17012 %}
17013 %}
17014
17015 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17016 %{
17017 predicate(UseAPX);
17018 match(Set dst (MinI src1 src2));
17019 effect(DEF dst, USE src1, USE src2);
17020 flag(PD::Flag_ndd_demotable_opr1);
17021
17022 ins_cost(200);
17023 expand %{
17024 rFlagsReg cr;
17025 compI_rReg(cr, src1, src2);
17026 cmovI_reg_g_ndd(dst, src1, src2, cr);
17027 %}
17028 %}
17029
17030 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
17031 %{
17032 predicate(!UseAPX);
17033 effect(USE_DEF dst, USE src, USE cr);
17034
17035 format %{ "cmovllt $dst, $src\t# max" %}
17036 ins_encode %{
17037 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
17038 %}
17039 ins_pipe(pipe_cmov_reg);
17040 %}
17041
17042 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
17043 %{
17044 predicate(UseAPX);
17045 effect(DEF dst, USE src1, USE src2, USE cr);
17046
17047 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
17048 ins_encode %{
17049 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
17050 %}
17051 ins_pipe(pipe_cmov_reg);
17052 %}
17053
17054 instruct maxI_rReg(rRegI dst, rRegI src)
17055 %{
17056 predicate(!UseAPX);
17057 match(Set dst (MaxI dst src));
17058
17059 ins_cost(200);
17060 expand %{
17061 rFlagsReg cr;
17062 compI_rReg(cr, dst, src);
17063 cmovI_reg_l(dst, src, cr);
17064 %}
17065 %}
17066
17067 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
17068 %{
17069 predicate(UseAPX);
17070 match(Set dst (MaxI src1 src2));
17071 effect(DEF dst, USE src1, USE src2);
17072 flag(PD::Flag_ndd_demotable_opr1);
17073
17074 ins_cost(200);
17075 expand %{
17076 rFlagsReg cr;
17077 compI_rReg(cr, src1, src2);
17078 cmovI_reg_l_ndd(dst, src1, src2, cr);
17079 %}
17080 %}
17081
17082 // ============================================================================
17083 // Branch Instructions
17084
17085 // Jump Direct - Label defines a relative address from JMP+1
17086 instruct jmpDir(label labl)
17087 %{
17088 match(Goto);
17089 effect(USE labl);
17090
17091 ins_cost(300);
17092 format %{ "jmp $labl" %}
17093 size(5);
17094 ins_encode %{
17095 Label* L = $labl$$label;
17096 __ jmp(*L, false); // Always long jump
17097 %}
17098 ins_pipe(pipe_jmp);
17099 %}
17100
17101 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17102 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
17103 %{
17104 match(If cop cr);
17105 effect(USE labl);
17106
17107 ins_cost(300);
17108 format %{ "j$cop $labl" %}
17109 size(6);
17110 ins_encode %{
17111 Label* L = $labl$$label;
17112 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17113 %}
17114 ins_pipe(pipe_jcc);
17115 %}
17116
17117 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17118 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
17119 %{
17120 match(CountedLoopEnd cop cr);
17121 effect(USE labl);
17122
17123 ins_cost(300);
17124 format %{ "j$cop $labl\t# loop end" %}
17125 size(6);
17126 ins_encode %{
17127 Label* L = $labl$$label;
17128 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17129 %}
17130 ins_pipe(pipe_jcc);
17131 %}
17132
17133 // Jump Direct Conditional - using unsigned comparison
17134 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17135 match(If cop cmp);
17136 effect(USE labl);
17137
17138 ins_cost(300);
17139 format %{ "j$cop,u $labl" %}
17140 size(6);
17141 ins_encode %{
17142 Label* L = $labl$$label;
17143 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17144 %}
17145 ins_pipe(pipe_jcc);
17146 %}
17147
17148 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17149 match(If cop cmp);
17150 effect(USE labl);
17151
17152 ins_cost(200);
17153 format %{ "j$cop,u $labl" %}
17154 size(6);
17155 ins_encode %{
17156 Label* L = $labl$$label;
17157 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17158 %}
17159 ins_pipe(pipe_jcc);
17160 %}
17161
17162 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17163 match(If cop cmp);
17164 effect(USE labl);
17165
17166 ins_cost(200);
17167 format %{ $$template
17168 if ($cop$$cmpcode == Assembler::notEqual) {
17169 $$emit$$"jp,u $labl\n\t"
17170 $$emit$$"j$cop,u $labl"
17171 } else {
17172 $$emit$$"jp,u done\n\t"
17173 $$emit$$"j$cop,u $labl\n\t"
17174 $$emit$$"done:"
17175 }
17176 %}
17177 ins_encode %{
17178 Label* l = $labl$$label;
17179 if ($cop$$cmpcode == Assembler::notEqual) {
17180 __ jcc(Assembler::parity, *l, false);
17181 __ jcc(Assembler::notEqual, *l, false);
17182 } else if ($cop$$cmpcode == Assembler::equal) {
17183 Label done;
17184 __ jccb(Assembler::parity, done);
17185 __ jcc(Assembler::equal, *l, false);
17186 __ bind(done);
17187 } else {
17188 ShouldNotReachHere();
17189 }
17190 %}
17191 ins_pipe(pipe_jcc);
17192 %}
17193
17194 // Jump Direct Conditional - using signed and unsigned comparison
17195 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17196 match(If cop cmp);
17197 effect(USE labl);
17198
17199 ins_cost(200);
17200 format %{ "j$cop,su $labl" %}
17201 size(6);
17202 ins_encode %{
17203 Label* L = $labl$$label;
17204 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
17205 %}
17206 ins_pipe(pipe_jcc);
17207 %}
17208
17209 // ============================================================================
17210 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
17211 // superklass array for an instance of the superklass. Set a hidden
17212 // internal cache on a hit (cache is checked with exposed code in
17213 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
17214 // encoding ALSO sets flags.
17215
17216 instruct partialSubtypeCheck(rdi_RegP result,
17217 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
17218 rFlagsReg cr)
17219 %{
17220 match(Set result (PartialSubtypeCheck sub super));
17221 predicate(!UseSecondarySupersTable);
17222 effect(KILL rcx, KILL cr);
17223
17224 ins_cost(1100); // slightly larger than the next version
17225 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
17226 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
17227 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
17228 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
17229 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
17230 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
17231 "xorq $result, $result\t\t Hit: rdi zero\n\t"
17232 "miss:\t" %}
17233
17234 ins_encode %{
17235 Label miss;
17236 // NB: Callers may assume that, when $result is a valid register,
17237 // check_klass_subtype_slow_path_linear sets it to a nonzero
17238 // value.
17239 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17240 $rcx$$Register, $result$$Register,
17241 nullptr, &miss,
17242 /*set_cond_codes:*/ true);
17243 __ xorptr($result$$Register, $result$$Register);
17244 __ bind(miss);
17245 %}
17246
17247 ins_pipe(pipe_slow);
17248 %}
17249
17250 // ============================================================================
17251 // Two versions of hashtable-based partialSubtypeCheck, both used when
17252 // we need to search for a super class in the secondary supers array.
17253 // The first is used when we don't know _a priori_ the class being
17254 // searched for. The second, far more common, is used when we do know:
17255 // this is used for instanceof, checkcast, and any case where C2 can
17256 // determine it by constant propagation.
17257
17258 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17259 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17260 rFlagsReg cr)
17261 %{
17262 match(Set result (PartialSubtypeCheck sub super));
17263 predicate(UseSecondarySupersTable);
17264 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17265
17266 ins_cost(1000);
17267 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17268
17269 ins_encode %{
17270 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17271 $temp3$$Register, $temp4$$Register, $result$$Register);
17272 %}
17273
17274 ins_pipe(pipe_slow);
17275 %}
17276
17277 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17278 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17279 rFlagsReg cr)
17280 %{
17281 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17282 predicate(UseSecondarySupersTable);
17283 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17284
17285 ins_cost(700); // smaller than the next version
17286 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17287
17288 ins_encode %{
17289 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17290 if (InlineSecondarySupersTest) {
17291 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17292 $temp3$$Register, $temp4$$Register, $result$$Register,
17293 super_klass_slot);
17294 } else {
17295 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17296 }
17297 %}
17298
17299 ins_pipe(pipe_slow);
17300 %}
17301
17302 // ============================================================================
17303 // Branch Instructions -- short offset versions
17304 //
17305 // These instructions are used to replace jumps of a long offset (the default
17306 // match) with jumps of a shorter offset. These instructions are all tagged
17307 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17308 // match rules in general matching. Instead, the ADLC generates a conversion
17309 // method in the MachNode which can be used to do in-place replacement of the
17310 // long variant with the shorter variant. The compiler will determine if a
17311 // branch can be taken by the is_short_branch_offset() predicate in the machine
17312 // specific code section of the file.
17313
17314 // Jump Direct - Label defines a relative address from JMP+1
17315 instruct jmpDir_short(label labl) %{
17316 match(Goto);
17317 effect(USE labl);
17318
17319 ins_cost(300);
17320 format %{ "jmp,s $labl" %}
17321 size(2);
17322 ins_encode %{
17323 Label* L = $labl$$label;
17324 __ jmpb(*L);
17325 %}
17326 ins_pipe(pipe_jmp);
17327 ins_short_branch(1);
17328 %}
17329
17330 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17331 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17332 match(If cop cr);
17333 effect(USE labl);
17334
17335 ins_cost(300);
17336 format %{ "j$cop,s $labl" %}
17337 size(2);
17338 ins_encode %{
17339 Label* L = $labl$$label;
17340 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17341 %}
17342 ins_pipe(pipe_jcc);
17343 ins_short_branch(1);
17344 %}
17345
17346 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17347 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17348 match(CountedLoopEnd cop cr);
17349 effect(USE labl);
17350
17351 ins_cost(300);
17352 format %{ "j$cop,s $labl\t# loop end" %}
17353 size(2);
17354 ins_encode %{
17355 Label* L = $labl$$label;
17356 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17357 %}
17358 ins_pipe(pipe_jcc);
17359 ins_short_branch(1);
17360 %}
17361
17362 // Jump Direct Conditional - using unsigned comparison
17363 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17364 match(If cop cmp);
17365 effect(USE labl);
17366
17367 ins_cost(300);
17368 format %{ "j$cop,us $labl" %}
17369 size(2);
17370 ins_encode %{
17371 Label* L = $labl$$label;
17372 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17373 %}
17374 ins_pipe(pipe_jcc);
17375 ins_short_branch(1);
17376 %}
17377
17378 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17379 match(If cop cmp);
17380 effect(USE labl);
17381
17382 ins_cost(300);
17383 format %{ "j$cop,us $labl" %}
17384 size(2);
17385 ins_encode %{
17386 Label* L = $labl$$label;
17387 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17388 %}
17389 ins_pipe(pipe_jcc);
17390 ins_short_branch(1);
17391 %}
17392
17393 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17394 match(If cop cmp);
17395 effect(USE labl);
17396
17397 ins_cost(300);
17398 format %{ $$template
17399 if ($cop$$cmpcode == Assembler::notEqual) {
17400 $$emit$$"jp,u,s $labl\n\t"
17401 $$emit$$"j$cop,u,s $labl"
17402 } else {
17403 $$emit$$"jp,u,s done\n\t"
17404 $$emit$$"j$cop,u,s $labl\n\t"
17405 $$emit$$"done:"
17406 }
17407 %}
17408 size(4);
17409 ins_encode %{
17410 Label* l = $labl$$label;
17411 if ($cop$$cmpcode == Assembler::notEqual) {
17412 __ jccb(Assembler::parity, *l);
17413 __ jccb(Assembler::notEqual, *l);
17414 } else if ($cop$$cmpcode == Assembler::equal) {
17415 Label done;
17416 __ jccb(Assembler::parity, done);
17417 __ jccb(Assembler::equal, *l);
17418 __ bind(done);
17419 } else {
17420 ShouldNotReachHere();
17421 }
17422 %}
17423 ins_pipe(pipe_jcc);
17424 ins_short_branch(1);
17425 %}
17426
17427 // Jump Direct Conditional - using signed and unsigned comparison
17428 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17429 match(If cop cmp);
17430 effect(USE labl);
17431
17432 ins_cost(300);
17433 format %{ "j$cop,sus $labl" %}
17434 size(2);
17435 ins_encode %{
17436 Label* L = $labl$$label;
17437 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17438 %}
17439 ins_pipe(pipe_jcc);
17440 ins_short_branch(1);
17441 %}
17442
17443 // ============================================================================
17444 // inlined locking and unlocking
17445
17446 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17447 match(Set cr (FastLock object box));
17448 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17449 ins_cost(300);
17450 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17451 ins_encode %{
17452 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17453 %}
17454 ins_pipe(pipe_slow);
17455 %}
17456
17457 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17458 match(Set cr (FastUnlock object rax_reg));
17459 effect(TEMP tmp, USE_KILL rax_reg);
17460 ins_cost(300);
17461 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17462 ins_encode %{
17463 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17464 %}
17465 ins_pipe(pipe_slow);
17466 %}
17467
17468
17469 // ============================================================================
17470 // Safepoint Instructions
17471 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17472 %{
17473 match(SafePoint poll);
17474 effect(KILL cr, USE poll);
17475
17476 format %{ "testl rax, [$poll]\t"
17477 "# Safepoint: poll for GC" %}
17478 ins_cost(125);
17479 ins_encode %{
17480 __ relocate(relocInfo::poll_type);
17481 address pre_pc = __ pc();
17482 __ testl(rax, Address($poll$$Register, 0));
17483 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17484 %}
17485 ins_pipe(ialu_reg_mem);
17486 %}
17487
17488 instruct mask_all_evexL(kReg dst, rRegL src) %{
17489 match(Set dst (MaskAll src));
17490 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17491 ins_encode %{
17492 int mask_len = Matcher::vector_length(this);
17493 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17494 %}
17495 ins_pipe( pipe_slow );
17496 %}
17497
17498 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17499 predicate(Matcher::vector_length(n) > 32);
17500 match(Set dst (MaskAll src));
17501 effect(TEMP tmp);
17502 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17503 ins_encode %{
17504 int mask_len = Matcher::vector_length(this);
17505 __ movslq($tmp$$Register, $src$$Register);
17506 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17507 %}
17508 ins_pipe( pipe_slow );
17509 %}
17510
17511 // ============================================================================
17512 // Procedure Call/Return Instructions
17513 // Call Java Static Instruction
17514 // Note: If this code changes, the corresponding ret_addr_offset() and
17515 // compute_padding() functions will have to be adjusted.
17516 instruct CallStaticJavaDirect(method meth) %{
17517 match(CallStaticJava);
17518 effect(USE meth);
17519
17520 ins_cost(300);
17521 format %{ "call,static " %}
17522 opcode(0xE8); /* E8 cd */
17523 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17524 ins_pipe(pipe_slow);
17525 ins_alignment(4);
17526 %}
17527
17528 // Call Java Dynamic Instruction
17529 // Note: If this code changes, the corresponding ret_addr_offset() and
17530 // compute_padding() functions will have to be adjusted.
17531 instruct CallDynamicJavaDirect(method meth)
17532 %{
17533 match(CallDynamicJava);
17534 effect(USE meth);
17535
17536 ins_cost(300);
17537 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17538 "call,dynamic " %}
17539 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17540 ins_pipe(pipe_slow);
17541 ins_alignment(4);
17542 %}
17543
17544 // Call Runtime Instruction
17545 instruct CallRuntimeDirect(method meth)
17546 %{
17547 match(CallRuntime);
17548 effect(USE meth);
17549
17550 ins_cost(300);
17551 format %{ "call,runtime " %}
17552 ins_encode(clear_avx, Java_To_Runtime(meth));
17553 ins_pipe(pipe_slow);
17554 %}
17555
17556 // Call runtime without safepoint
17557 instruct CallLeafDirect(method meth)
17558 %{
17559 match(CallLeaf);
17560 effect(USE meth);
17561
17562 ins_cost(300);
17563 format %{ "call_leaf,runtime " %}
17564 ins_encode(clear_avx, Java_To_Runtime(meth));
17565 ins_pipe(pipe_slow);
17566 %}
17567
17568 // Call runtime without safepoint and with vector arguments
17569 instruct CallLeafDirectVector(method meth)
17570 %{
17571 match(CallLeafVector);
17572 effect(USE meth);
17573
17574 ins_cost(300);
17575 format %{ "call_leaf,vector " %}
17576 ins_encode(Java_To_Runtime(meth));
17577 ins_pipe(pipe_slow);
17578 %}
17579
17580 // Call runtime without safepoint
17581 // entry point is null, target holds the address to call
17582 instruct CallLeafNoFPInDirect(rRegP target)
17583 %{
17584 predicate(n->as_Call()->entry_point() == nullptr);
17585 match(CallLeafNoFP target);
17586
17587 ins_cost(300);
17588 format %{ "call_leaf_nofp,runtime indirect " %}
17589 ins_encode %{
17590 __ call($target$$Register);
17591 %}
17592
17593 ins_pipe(pipe_slow);
17594 %}
17595
17596 // Call runtime without safepoint
17597 instruct CallLeafNoFPDirect(method meth)
17598 %{
17599 predicate(n->as_Call()->entry_point() != nullptr);
17600 match(CallLeafNoFP);
17601 effect(USE meth);
17602
17603 ins_cost(300);
17604 format %{ "call_leaf_nofp,runtime " %}
17605 ins_encode(clear_avx, Java_To_Runtime(meth));
17606 ins_pipe(pipe_slow);
17607 %}
17608
17609 // Return Instruction
17610 // Remove the return address & jump to it.
17611 // Notice: We always emit a nop after a ret to make sure there is room
17612 // for safepoint patching
17613 instruct Ret()
17614 %{
17615 match(Return);
17616
17617 format %{ "ret" %}
17618 ins_encode %{
17619 __ ret(0);
17620 %}
17621 ins_pipe(pipe_jmp);
17622 %}
17623
17624 // Tail Call; Jump from runtime stub to Java code.
17625 // Also known as an 'interprocedural jump'.
17626 // Target of jump will eventually return to caller.
17627 // TailJump below removes the return address.
17628 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17629 // emitted just above the TailCall which has reset rbp to the caller state.
17630 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17631 %{
17632 match(TailCall jump_target method_ptr);
17633
17634 ins_cost(300);
17635 format %{ "jmp $jump_target\t# rbx holds method" %}
17636 ins_encode %{
17637 __ jmp($jump_target$$Register);
17638 %}
17639 ins_pipe(pipe_jmp);
17640 %}
17641
17642 // Tail Jump; remove the return address; jump to target.
17643 // TailCall above leaves the return address around.
17644 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17645 %{
17646 match(TailJump jump_target ex_oop);
17647
17648 ins_cost(300);
17649 format %{ "popq rdx\t# pop return address\n\t"
17650 "jmp $jump_target" %}
17651 ins_encode %{
17652 __ popq(as_Register(RDX_enc));
17653 __ jmp($jump_target$$Register);
17654 %}
17655 ins_pipe(pipe_jmp);
17656 %}
17657
17658 // Forward exception.
17659 instruct ForwardExceptionjmp()
17660 %{
17661 match(ForwardException);
17662
17663 format %{ "jmp forward_exception_stub" %}
17664 ins_encode %{
17665 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17666 %}
17667 ins_pipe(pipe_jmp);
17668 %}
17669
17670 // Create exception oop: created by stack-crawling runtime code.
17671 // Created exception is now available to this handler, and is setup
17672 // just prior to jumping to this handler. No code emitted.
17673 instruct CreateException(rax_RegP ex_oop)
17674 %{
17675 match(Set ex_oop (CreateEx));
17676
17677 size(0);
17678 // use the following format syntax
17679 format %{ "# exception oop is in rax; no code emitted" %}
17680 ins_encode();
17681 ins_pipe(empty);
17682 %}
17683
17684 // Rethrow exception:
17685 // The exception oop will come in the first argument position.
17686 // Then JUMP (not call) to the rethrow stub code.
17687 instruct RethrowException()
17688 %{
17689 match(Rethrow);
17690
17691 // use the following format syntax
17692 format %{ "jmp rethrow_stub" %}
17693 ins_encode %{
17694 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17695 %}
17696 ins_pipe(pipe_jmp);
17697 %}
17698
17699 // ============================================================================
17700 // This name is KNOWN by the ADLC and cannot be changed.
17701 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17702 // for this guy.
17703 instruct tlsLoadP(r15_RegP dst) %{
17704 match(Set dst (ThreadLocal));
17705 effect(DEF dst);
17706
17707 size(0);
17708 format %{ "# TLS is in R15" %}
17709 ins_encode( /*empty encoding*/ );
17710 ins_pipe(ialu_reg_reg);
17711 %}
17712
17713 instruct addF_reg(regF dst, regF src) %{
17714 predicate(UseAVX == 0);
17715 match(Set dst (AddF dst src));
17716
17717 format %{ "addss $dst, $src" %}
17718 ins_cost(150);
17719 ins_encode %{
17720 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17721 %}
17722 ins_pipe(pipe_slow);
17723 %}
17724
17725 instruct addF_mem(regF dst, memory src) %{
17726 predicate(UseAVX == 0);
17727 match(Set dst (AddF dst (LoadF src)));
17728
17729 format %{ "addss $dst, $src" %}
17730 ins_cost(150);
17731 ins_encode %{
17732 __ addss($dst$$XMMRegister, $src$$Address);
17733 %}
17734 ins_pipe(pipe_slow);
17735 %}
17736
17737 instruct addF_imm(regF dst, immF con) %{
17738 predicate(UseAVX == 0);
17739 match(Set dst (AddF dst con));
17740 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17741 ins_cost(150);
17742 ins_encode %{
17743 __ addss($dst$$XMMRegister, $constantaddress($con));
17744 %}
17745 ins_pipe(pipe_slow);
17746 %}
17747
17748 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17749 predicate(UseAVX > 0);
17750 match(Set dst (AddF src1 src2));
17751
17752 format %{ "vaddss $dst, $src1, $src2" %}
17753 ins_cost(150);
17754 ins_encode %{
17755 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17756 %}
17757 ins_pipe(pipe_slow);
17758 %}
17759
17760 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17761 predicate(UseAVX > 0);
17762 match(Set dst (AddF src1 (LoadF src2)));
17763
17764 format %{ "vaddss $dst, $src1, $src2" %}
17765 ins_cost(150);
17766 ins_encode %{
17767 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17768 %}
17769 ins_pipe(pipe_slow);
17770 %}
17771
17772 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17773 predicate(UseAVX > 0);
17774 match(Set dst (AddF src con));
17775
17776 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17777 ins_cost(150);
17778 ins_encode %{
17779 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17780 %}
17781 ins_pipe(pipe_slow);
17782 %}
17783
17784 instruct addD_reg(regD dst, regD src) %{
17785 predicate(UseAVX == 0);
17786 match(Set dst (AddD dst src));
17787
17788 format %{ "addsd $dst, $src" %}
17789 ins_cost(150);
17790 ins_encode %{
17791 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17792 %}
17793 ins_pipe(pipe_slow);
17794 %}
17795
17796 instruct addD_mem(regD dst, memory src) %{
17797 predicate(UseAVX == 0);
17798 match(Set dst (AddD dst (LoadD src)));
17799
17800 format %{ "addsd $dst, $src" %}
17801 ins_cost(150);
17802 ins_encode %{
17803 __ addsd($dst$$XMMRegister, $src$$Address);
17804 %}
17805 ins_pipe(pipe_slow);
17806 %}
17807
17808 instruct addD_imm(regD dst, immD con) %{
17809 predicate(UseAVX == 0);
17810 match(Set dst (AddD dst con));
17811 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17812 ins_cost(150);
17813 ins_encode %{
17814 __ addsd($dst$$XMMRegister, $constantaddress($con));
17815 %}
17816 ins_pipe(pipe_slow);
17817 %}
17818
17819 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17820 predicate(UseAVX > 0);
17821 match(Set dst (AddD src1 src2));
17822
17823 format %{ "vaddsd $dst, $src1, $src2" %}
17824 ins_cost(150);
17825 ins_encode %{
17826 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17827 %}
17828 ins_pipe(pipe_slow);
17829 %}
17830
17831 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17832 predicate(UseAVX > 0);
17833 match(Set dst (AddD src1 (LoadD src2)));
17834
17835 format %{ "vaddsd $dst, $src1, $src2" %}
17836 ins_cost(150);
17837 ins_encode %{
17838 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17839 %}
17840 ins_pipe(pipe_slow);
17841 %}
17842
17843 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17844 predicate(UseAVX > 0);
17845 match(Set dst (AddD src con));
17846
17847 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17848 ins_cost(150);
17849 ins_encode %{
17850 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17851 %}
17852 ins_pipe(pipe_slow);
17853 %}
17854
17855 instruct subF_reg(regF dst, regF src) %{
17856 predicate(UseAVX == 0);
17857 match(Set dst (SubF dst src));
17858
17859 format %{ "subss $dst, $src" %}
17860 ins_cost(150);
17861 ins_encode %{
17862 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17863 %}
17864 ins_pipe(pipe_slow);
17865 %}
17866
17867 instruct subF_mem(regF dst, memory src) %{
17868 predicate(UseAVX == 0);
17869 match(Set dst (SubF dst (LoadF src)));
17870
17871 format %{ "subss $dst, $src" %}
17872 ins_cost(150);
17873 ins_encode %{
17874 __ subss($dst$$XMMRegister, $src$$Address);
17875 %}
17876 ins_pipe(pipe_slow);
17877 %}
17878
17879 instruct subF_imm(regF dst, immF con) %{
17880 predicate(UseAVX == 0);
17881 match(Set dst (SubF dst con));
17882 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17883 ins_cost(150);
17884 ins_encode %{
17885 __ subss($dst$$XMMRegister, $constantaddress($con));
17886 %}
17887 ins_pipe(pipe_slow);
17888 %}
17889
17890 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17891 predicate(UseAVX > 0);
17892 match(Set dst (SubF src1 src2));
17893
17894 format %{ "vsubss $dst, $src1, $src2" %}
17895 ins_cost(150);
17896 ins_encode %{
17897 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17898 %}
17899 ins_pipe(pipe_slow);
17900 %}
17901
17902 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17903 predicate(UseAVX > 0);
17904 match(Set dst (SubF src1 (LoadF src2)));
17905
17906 format %{ "vsubss $dst, $src1, $src2" %}
17907 ins_cost(150);
17908 ins_encode %{
17909 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17910 %}
17911 ins_pipe(pipe_slow);
17912 %}
17913
17914 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17915 predicate(UseAVX > 0);
17916 match(Set dst (SubF src con));
17917
17918 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17919 ins_cost(150);
17920 ins_encode %{
17921 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17922 %}
17923 ins_pipe(pipe_slow);
17924 %}
17925
17926 instruct subD_reg(regD dst, regD src) %{
17927 predicate(UseAVX == 0);
17928 match(Set dst (SubD dst src));
17929
17930 format %{ "subsd $dst, $src" %}
17931 ins_cost(150);
17932 ins_encode %{
17933 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17934 %}
17935 ins_pipe(pipe_slow);
17936 %}
17937
17938 instruct subD_mem(regD dst, memory src) %{
17939 predicate(UseAVX == 0);
17940 match(Set dst (SubD dst (LoadD src)));
17941
17942 format %{ "subsd $dst, $src" %}
17943 ins_cost(150);
17944 ins_encode %{
17945 __ subsd($dst$$XMMRegister, $src$$Address);
17946 %}
17947 ins_pipe(pipe_slow);
17948 %}
17949
17950 instruct subD_imm(regD dst, immD con) %{
17951 predicate(UseAVX == 0);
17952 match(Set dst (SubD dst con));
17953 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17954 ins_cost(150);
17955 ins_encode %{
17956 __ subsd($dst$$XMMRegister, $constantaddress($con));
17957 %}
17958 ins_pipe(pipe_slow);
17959 %}
17960
17961 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17962 predicate(UseAVX > 0);
17963 match(Set dst (SubD src1 src2));
17964
17965 format %{ "vsubsd $dst, $src1, $src2" %}
17966 ins_cost(150);
17967 ins_encode %{
17968 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17969 %}
17970 ins_pipe(pipe_slow);
17971 %}
17972
17973 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17974 predicate(UseAVX > 0);
17975 match(Set dst (SubD src1 (LoadD src2)));
17976
17977 format %{ "vsubsd $dst, $src1, $src2" %}
17978 ins_cost(150);
17979 ins_encode %{
17980 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17981 %}
17982 ins_pipe(pipe_slow);
17983 %}
17984
17985 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17986 predicate(UseAVX > 0);
17987 match(Set dst (SubD src con));
17988
17989 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17990 ins_cost(150);
17991 ins_encode %{
17992 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17993 %}
17994 ins_pipe(pipe_slow);
17995 %}
17996
17997 instruct mulF_reg(regF dst, regF src) %{
17998 predicate(UseAVX == 0);
17999 match(Set dst (MulF dst src));
18000
18001 format %{ "mulss $dst, $src" %}
18002 ins_cost(150);
18003 ins_encode %{
18004 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
18005 %}
18006 ins_pipe(pipe_slow);
18007 %}
18008
18009 instruct mulF_mem(regF dst, memory src) %{
18010 predicate(UseAVX == 0);
18011 match(Set dst (MulF dst (LoadF src)));
18012
18013 format %{ "mulss $dst, $src" %}
18014 ins_cost(150);
18015 ins_encode %{
18016 __ mulss($dst$$XMMRegister, $src$$Address);
18017 %}
18018 ins_pipe(pipe_slow);
18019 %}
18020
18021 instruct mulF_imm(regF dst, immF con) %{
18022 predicate(UseAVX == 0);
18023 match(Set dst (MulF dst con));
18024 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18025 ins_cost(150);
18026 ins_encode %{
18027 __ mulss($dst$$XMMRegister, $constantaddress($con));
18028 %}
18029 ins_pipe(pipe_slow);
18030 %}
18031
18032 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
18033 predicate(UseAVX > 0);
18034 match(Set dst (MulF src1 src2));
18035
18036 format %{ "vmulss $dst, $src1, $src2" %}
18037 ins_cost(150);
18038 ins_encode %{
18039 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18040 %}
18041 ins_pipe(pipe_slow);
18042 %}
18043
18044 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
18045 predicate(UseAVX > 0);
18046 match(Set dst (MulF src1 (LoadF src2)));
18047
18048 format %{ "vmulss $dst, $src1, $src2" %}
18049 ins_cost(150);
18050 ins_encode %{
18051 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18052 %}
18053 ins_pipe(pipe_slow);
18054 %}
18055
18056 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
18057 predicate(UseAVX > 0);
18058 match(Set dst (MulF src con));
18059
18060 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18061 ins_cost(150);
18062 ins_encode %{
18063 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18064 %}
18065 ins_pipe(pipe_slow);
18066 %}
18067
18068 instruct mulD_reg(regD dst, regD src) %{
18069 predicate(UseAVX == 0);
18070 match(Set dst (MulD dst src));
18071
18072 format %{ "mulsd $dst, $src" %}
18073 ins_cost(150);
18074 ins_encode %{
18075 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
18076 %}
18077 ins_pipe(pipe_slow);
18078 %}
18079
18080 instruct mulD_mem(regD dst, memory src) %{
18081 predicate(UseAVX == 0);
18082 match(Set dst (MulD dst (LoadD src)));
18083
18084 format %{ "mulsd $dst, $src" %}
18085 ins_cost(150);
18086 ins_encode %{
18087 __ mulsd($dst$$XMMRegister, $src$$Address);
18088 %}
18089 ins_pipe(pipe_slow);
18090 %}
18091
18092 instruct mulD_imm(regD dst, immD con) %{
18093 predicate(UseAVX == 0);
18094 match(Set dst (MulD dst con));
18095 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18096 ins_cost(150);
18097 ins_encode %{
18098 __ mulsd($dst$$XMMRegister, $constantaddress($con));
18099 %}
18100 ins_pipe(pipe_slow);
18101 %}
18102
18103 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
18104 predicate(UseAVX > 0);
18105 match(Set dst (MulD src1 src2));
18106
18107 format %{ "vmulsd $dst, $src1, $src2" %}
18108 ins_cost(150);
18109 ins_encode %{
18110 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18111 %}
18112 ins_pipe(pipe_slow);
18113 %}
18114
18115 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
18116 predicate(UseAVX > 0);
18117 match(Set dst (MulD src1 (LoadD src2)));
18118
18119 format %{ "vmulsd $dst, $src1, $src2" %}
18120 ins_cost(150);
18121 ins_encode %{
18122 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18123 %}
18124 ins_pipe(pipe_slow);
18125 %}
18126
18127 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
18128 predicate(UseAVX > 0);
18129 match(Set dst (MulD src con));
18130
18131 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18132 ins_cost(150);
18133 ins_encode %{
18134 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18135 %}
18136 ins_pipe(pipe_slow);
18137 %}
18138
18139 instruct divF_reg(regF dst, regF src) %{
18140 predicate(UseAVX == 0);
18141 match(Set dst (DivF dst src));
18142
18143 format %{ "divss $dst, $src" %}
18144 ins_cost(150);
18145 ins_encode %{
18146 __ divss($dst$$XMMRegister, $src$$XMMRegister);
18147 %}
18148 ins_pipe(pipe_slow);
18149 %}
18150
18151 instruct divF_mem(regF dst, memory src) %{
18152 predicate(UseAVX == 0);
18153 match(Set dst (DivF dst (LoadF src)));
18154
18155 format %{ "divss $dst, $src" %}
18156 ins_cost(150);
18157 ins_encode %{
18158 __ divss($dst$$XMMRegister, $src$$Address);
18159 %}
18160 ins_pipe(pipe_slow);
18161 %}
18162
18163 instruct divF_imm(regF dst, immF con) %{
18164 predicate(UseAVX == 0);
18165 match(Set dst (DivF dst con));
18166 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
18167 ins_cost(150);
18168 ins_encode %{
18169 __ divss($dst$$XMMRegister, $constantaddress($con));
18170 %}
18171 ins_pipe(pipe_slow);
18172 %}
18173
18174 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
18175 predicate(UseAVX > 0);
18176 match(Set dst (DivF src1 src2));
18177
18178 format %{ "vdivss $dst, $src1, $src2" %}
18179 ins_cost(150);
18180 ins_encode %{
18181 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18182 %}
18183 ins_pipe(pipe_slow);
18184 %}
18185
18186 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
18187 predicate(UseAVX > 0);
18188 match(Set dst (DivF src1 (LoadF src2)));
18189
18190 format %{ "vdivss $dst, $src1, $src2" %}
18191 ins_cost(150);
18192 ins_encode %{
18193 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18194 %}
18195 ins_pipe(pipe_slow);
18196 %}
18197
18198 instruct divF_reg_imm(regF dst, regF src, immF con) %{
18199 predicate(UseAVX > 0);
18200 match(Set dst (DivF src con));
18201
18202 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
18203 ins_cost(150);
18204 ins_encode %{
18205 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18206 %}
18207 ins_pipe(pipe_slow);
18208 %}
18209
18210 instruct divD_reg(regD dst, regD src) %{
18211 predicate(UseAVX == 0);
18212 match(Set dst (DivD dst src));
18213
18214 format %{ "divsd $dst, $src" %}
18215 ins_cost(150);
18216 ins_encode %{
18217 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
18218 %}
18219 ins_pipe(pipe_slow);
18220 %}
18221
18222 instruct divD_mem(regD dst, memory src) %{
18223 predicate(UseAVX == 0);
18224 match(Set dst (DivD dst (LoadD src)));
18225
18226 format %{ "divsd $dst, $src" %}
18227 ins_cost(150);
18228 ins_encode %{
18229 __ divsd($dst$$XMMRegister, $src$$Address);
18230 %}
18231 ins_pipe(pipe_slow);
18232 %}
18233
18234 instruct divD_imm(regD dst, immD con) %{
18235 predicate(UseAVX == 0);
18236 match(Set dst (DivD dst con));
18237 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
18238 ins_cost(150);
18239 ins_encode %{
18240 __ divsd($dst$$XMMRegister, $constantaddress($con));
18241 %}
18242 ins_pipe(pipe_slow);
18243 %}
18244
18245 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
18246 predicate(UseAVX > 0);
18247 match(Set dst (DivD src1 src2));
18248
18249 format %{ "vdivsd $dst, $src1, $src2" %}
18250 ins_cost(150);
18251 ins_encode %{
18252 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
18253 %}
18254 ins_pipe(pipe_slow);
18255 %}
18256
18257 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18258 predicate(UseAVX > 0);
18259 match(Set dst (DivD src1 (LoadD src2)));
18260
18261 format %{ "vdivsd $dst, $src1, $src2" %}
18262 ins_cost(150);
18263 ins_encode %{
18264 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18265 %}
18266 ins_pipe(pipe_slow);
18267 %}
18268
18269 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18270 predicate(UseAVX > 0);
18271 match(Set dst (DivD src con));
18272
18273 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18274 ins_cost(150);
18275 ins_encode %{
18276 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18277 %}
18278 ins_pipe(pipe_slow);
18279 %}
18280
18281 instruct absF_reg(regF dst) %{
18282 predicate(UseAVX == 0);
18283 match(Set dst (AbsF dst));
18284 ins_cost(150);
18285 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18286 ins_encode %{
18287 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18288 %}
18289 ins_pipe(pipe_slow);
18290 %}
18291
18292 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18293 predicate(UseAVX > 0);
18294 match(Set dst (AbsF src));
18295 ins_cost(150);
18296 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18297 ins_encode %{
18298 int vlen_enc = Assembler::AVX_128bit;
18299 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18300 ExternalAddress(float_signmask()), vlen_enc);
18301 %}
18302 ins_pipe(pipe_slow);
18303 %}
18304
18305 instruct absD_reg(regD dst) %{
18306 predicate(UseAVX == 0);
18307 match(Set dst (AbsD dst));
18308 ins_cost(150);
18309 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18310 "# abs double by sign masking" %}
18311 ins_encode %{
18312 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18313 %}
18314 ins_pipe(pipe_slow);
18315 %}
18316
18317 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18318 predicate(UseAVX > 0);
18319 match(Set dst (AbsD src));
18320 ins_cost(150);
18321 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18322 "# abs double by sign masking" %}
18323 ins_encode %{
18324 int vlen_enc = Assembler::AVX_128bit;
18325 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18326 ExternalAddress(double_signmask()), vlen_enc);
18327 %}
18328 ins_pipe(pipe_slow);
18329 %}
18330
18331 instruct negF_reg(regF dst) %{
18332 predicate(UseAVX == 0);
18333 match(Set dst (NegF dst));
18334 ins_cost(150);
18335 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18336 ins_encode %{
18337 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18338 %}
18339 ins_pipe(pipe_slow);
18340 %}
18341
18342 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18343 predicate(UseAVX > 0);
18344 match(Set dst (NegF src));
18345 ins_cost(150);
18346 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18347 ins_encode %{
18348 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18349 ExternalAddress(float_signflip()));
18350 %}
18351 ins_pipe(pipe_slow);
18352 %}
18353
18354 instruct negD_reg(regD dst) %{
18355 predicate(UseAVX == 0);
18356 match(Set dst (NegD dst));
18357 ins_cost(150);
18358 format %{ "xorpd $dst, [0x8000000000000000]\t"
18359 "# neg double by sign flipping" %}
18360 ins_encode %{
18361 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18362 %}
18363 ins_pipe(pipe_slow);
18364 %}
18365
18366 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18367 predicate(UseAVX > 0);
18368 match(Set dst (NegD src));
18369 ins_cost(150);
18370 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18371 "# neg double by sign flipping" %}
18372 ins_encode %{
18373 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18374 ExternalAddress(double_signflip()));
18375 %}
18376 ins_pipe(pipe_slow);
18377 %}
18378
18379 // sqrtss instruction needs destination register to be pre initialized for best performance
18380 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18381 instruct sqrtF_reg(regF dst) %{
18382 match(Set dst (SqrtF dst));
18383 format %{ "sqrtss $dst, $dst" %}
18384 ins_encode %{
18385 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18386 %}
18387 ins_pipe(pipe_slow);
18388 %}
18389
18390 // sqrtsd instruction needs destination register to be pre initialized for best performance
18391 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18392 instruct sqrtD_reg(regD dst) %{
18393 match(Set dst (SqrtD dst));
18394 format %{ "sqrtsd $dst, $dst" %}
18395 ins_encode %{
18396 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18397 %}
18398 ins_pipe(pipe_slow);
18399 %}
18400
18401 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18402 effect(TEMP tmp);
18403 match(Set dst (ConvF2HF src));
18404 ins_cost(125);
18405 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18406 ins_encode %{
18407 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18408 %}
18409 ins_pipe( pipe_slow );
18410 %}
18411
18412 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18413 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18414 effect(TEMP ktmp, TEMP rtmp);
18415 match(Set mem (StoreC mem (ConvF2HF src)));
18416 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18417 ins_encode %{
18418 __ movl($rtmp$$Register, 0x1);
18419 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18420 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18421 %}
18422 ins_pipe( pipe_slow );
18423 %}
18424
18425 instruct vconvF2HF(vec dst, vec src) %{
18426 match(Set dst (VectorCastF2HF src));
18427 format %{ "vector_conv_F2HF $dst $src" %}
18428 ins_encode %{
18429 int vlen_enc = vector_length_encoding(this, $src);
18430 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18431 %}
18432 ins_pipe( pipe_slow );
18433 %}
18434
18435 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18436 predicate(n->as_StoreVector()->memory_size() >= 16);
18437 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18438 format %{ "vcvtps2ph $mem,$src" %}
18439 ins_encode %{
18440 int vlen_enc = vector_length_encoding(this, $src);
18441 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18442 %}
18443 ins_pipe( pipe_slow );
18444 %}
18445
18446 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18447 match(Set dst (ConvHF2F src));
18448 format %{ "vcvtph2ps $dst,$src" %}
18449 ins_encode %{
18450 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18451 %}
18452 ins_pipe( pipe_slow );
18453 %}
18454
18455 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18456 match(Set dst (VectorCastHF2F (LoadVector mem)));
18457 format %{ "vcvtph2ps $dst,$mem" %}
18458 ins_encode %{
18459 int vlen_enc = vector_length_encoding(this);
18460 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18461 %}
18462 ins_pipe( pipe_slow );
18463 %}
18464
18465 instruct vconvHF2F(vec dst, vec src) %{
18466 match(Set dst (VectorCastHF2F src));
18467 ins_cost(125);
18468 format %{ "vector_conv_HF2F $dst,$src" %}
18469 ins_encode %{
18470 int vlen_enc = vector_length_encoding(this);
18471 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18472 %}
18473 ins_pipe( pipe_slow );
18474 %}
18475
18476 // ---------------------------------------- VectorReinterpret ------------------------------------
18477 instruct reinterpret_mask(kReg dst) %{
18478 predicate(n->bottom_type()->isa_vectmask() &&
18479 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18480 match(Set dst (VectorReinterpret dst));
18481 ins_cost(125);
18482 format %{ "vector_reinterpret $dst\t!" %}
18483 ins_encode %{
18484 // empty
18485 %}
18486 ins_pipe( pipe_slow );
18487 %}
18488
18489 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18490 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18491 n->bottom_type()->isa_vectmask() &&
18492 n->in(1)->bottom_type()->isa_vectmask() &&
18493 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18494 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18495 match(Set dst (VectorReinterpret src));
18496 effect(TEMP xtmp);
18497 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18498 ins_encode %{
18499 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18500 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18501 assert(src_sz == dst_sz , "src and dst size mismatch");
18502 int vlen_enc = vector_length_encoding(src_sz);
18503 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18504 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18505 %}
18506 ins_pipe( pipe_slow );
18507 %}
18508
18509 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18510 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18511 n->bottom_type()->isa_vectmask() &&
18512 n->in(1)->bottom_type()->isa_vectmask() &&
18513 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18514 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18515 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18516 match(Set dst (VectorReinterpret src));
18517 effect(TEMP xtmp);
18518 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18519 ins_encode %{
18520 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18521 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18522 assert(src_sz == dst_sz , "src and dst size mismatch");
18523 int vlen_enc = vector_length_encoding(src_sz);
18524 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18525 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18526 %}
18527 ins_pipe( pipe_slow );
18528 %}
18529
18530 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18531 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18532 n->bottom_type()->isa_vectmask() &&
18533 n->in(1)->bottom_type()->isa_vectmask() &&
18534 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18535 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18536 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18537 match(Set dst (VectorReinterpret src));
18538 effect(TEMP xtmp);
18539 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18540 ins_encode %{
18541 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18542 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18543 assert(src_sz == dst_sz , "src and dst size mismatch");
18544 int vlen_enc = vector_length_encoding(src_sz);
18545 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18546 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18547 %}
18548 ins_pipe( pipe_slow );
18549 %}
18550
18551 instruct reinterpret(vec dst) %{
18552 predicate(!n->bottom_type()->isa_vectmask() &&
18553 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18554 match(Set dst (VectorReinterpret dst));
18555 ins_cost(125);
18556 format %{ "vector_reinterpret $dst\t!" %}
18557 ins_encode %{
18558 // empty
18559 %}
18560 ins_pipe( pipe_slow );
18561 %}
18562
18563 instruct reinterpret_expand(vec dst, vec src) %{
18564 predicate(UseAVX == 0 &&
18565 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18566 match(Set dst (VectorReinterpret src));
18567 ins_cost(125);
18568 effect(TEMP dst);
18569 format %{ "vector_reinterpret_expand $dst,$src" %}
18570 ins_encode %{
18571 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18572 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18573
18574 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18575 if (src_vlen_in_bytes == 4) {
18576 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18577 } else {
18578 assert(src_vlen_in_bytes == 8, "");
18579 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18580 }
18581 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18582 %}
18583 ins_pipe( pipe_slow );
18584 %}
18585
18586 instruct vreinterpret_expand4(legVec dst, vec src) %{
18587 predicate(UseAVX > 0 &&
18588 !n->bottom_type()->isa_vectmask() &&
18589 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18590 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18591 match(Set dst (VectorReinterpret src));
18592 ins_cost(125);
18593 format %{ "vector_reinterpret_expand $dst,$src" %}
18594 ins_encode %{
18595 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18596 %}
18597 ins_pipe( pipe_slow );
18598 %}
18599
18600
18601 instruct vreinterpret_expand(legVec dst, vec src) %{
18602 predicate(UseAVX > 0 &&
18603 !n->bottom_type()->isa_vectmask() &&
18604 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18605 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18606 match(Set dst (VectorReinterpret src));
18607 ins_cost(125);
18608 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18609 ins_encode %{
18610 switch (Matcher::vector_length_in_bytes(this, $src)) {
18611 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18612 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18613 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18614 default: ShouldNotReachHere();
18615 }
18616 %}
18617 ins_pipe( pipe_slow );
18618 %}
18619
18620 instruct reinterpret_shrink(vec dst, legVec src) %{
18621 predicate(!n->bottom_type()->isa_vectmask() &&
18622 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18623 match(Set dst (VectorReinterpret src));
18624 ins_cost(125);
18625 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18626 ins_encode %{
18627 switch (Matcher::vector_length_in_bytes(this)) {
18628 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18629 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18630 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18631 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18632 default: ShouldNotReachHere();
18633 }
18634 %}
18635 ins_pipe( pipe_slow );
18636 %}
18637
18638 // ----------------------------------------------------------------------------------------------------
18639
18640 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18641 match(Set dst (RoundDoubleMode src rmode));
18642 format %{ "roundsd $dst,$src" %}
18643 ins_cost(150);
18644 ins_encode %{
18645 assert(UseSSE >= 4, "required");
18646 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18647 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18648 }
18649 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18650 %}
18651 ins_pipe(pipe_slow);
18652 %}
18653
18654 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18655 match(Set dst (RoundDoubleMode con rmode));
18656 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18657 ins_cost(150);
18658 ins_encode %{
18659 assert(UseSSE >= 4, "required");
18660 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18661 %}
18662 ins_pipe(pipe_slow);
18663 %}
18664
18665 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18666 predicate(Matcher::vector_length(n) < 8);
18667 match(Set dst (RoundDoubleModeV src rmode));
18668 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18669 ins_encode %{
18670 assert(UseAVX > 0, "required");
18671 int vlen_enc = vector_length_encoding(this);
18672 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18673 %}
18674 ins_pipe( pipe_slow );
18675 %}
18676
18677 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18678 predicate(Matcher::vector_length(n) == 8);
18679 match(Set dst (RoundDoubleModeV src rmode));
18680 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18681 ins_encode %{
18682 assert(UseAVX > 2, "required");
18683 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18684 %}
18685 ins_pipe( pipe_slow );
18686 %}
18687
18688 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18689 predicate(Matcher::vector_length(n) < 8);
18690 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18691 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18692 ins_encode %{
18693 assert(UseAVX > 0, "required");
18694 int vlen_enc = vector_length_encoding(this);
18695 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18696 %}
18697 ins_pipe( pipe_slow );
18698 %}
18699
18700 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18701 predicate(Matcher::vector_length(n) == 8);
18702 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18703 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18704 ins_encode %{
18705 assert(UseAVX > 2, "required");
18706 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18707 %}
18708 ins_pipe( pipe_slow );
18709 %}
18710
18711 instruct onspinwait() %{
18712 match(OnSpinWait);
18713 ins_cost(200);
18714
18715 format %{
18716 $$template
18717 $$emit$$"pause\t! membar_onspinwait"
18718 %}
18719 ins_encode %{
18720 __ pause();
18721 %}
18722 ins_pipe(pipe_slow);
18723 %}
18724
18725 // a * b + c
18726 instruct fmaD_reg(regD a, regD b, regD c) %{
18727 match(Set c (FmaD c (Binary a b)));
18728 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18729 ins_cost(150);
18730 ins_encode %{
18731 assert(UseFMA, "Needs FMA instructions support.");
18732 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18733 %}
18734 ins_pipe( pipe_slow );
18735 %}
18736
18737 // a * b + c
18738 instruct fmaF_reg(regF a, regF b, regF c) %{
18739 match(Set c (FmaF c (Binary a b)));
18740 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18741 ins_cost(150);
18742 ins_encode %{
18743 assert(UseFMA, "Needs FMA instructions support.");
18744 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18745 %}
18746 ins_pipe( pipe_slow );
18747 %}
18748
18749 // ====================VECTOR INSTRUCTIONS=====================================
18750
18751 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18752 instruct MoveVec2Leg(legVec dst, vec src) %{
18753 match(Set dst src);
18754 format %{ "" %}
18755 ins_encode %{
18756 ShouldNotReachHere();
18757 %}
18758 ins_pipe( fpu_reg_reg );
18759 %}
18760
18761 instruct MoveLeg2Vec(vec dst, legVec src) %{
18762 match(Set dst src);
18763 format %{ "" %}
18764 ins_encode %{
18765 ShouldNotReachHere();
18766 %}
18767 ins_pipe( fpu_reg_reg );
18768 %}
18769
18770 // ============================================================================
18771
18772 // Load vectors generic operand pattern
18773 instruct loadV(vec dst, memory mem) %{
18774 match(Set dst (LoadVector mem));
18775 ins_cost(125);
18776 format %{ "load_vector $dst,$mem" %}
18777 ins_encode %{
18778 BasicType bt = Matcher::vector_element_basic_type(this);
18779 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18780 %}
18781 ins_pipe( pipe_slow );
18782 %}
18783
18784 // Store vectors generic operand pattern.
18785 instruct storeV(memory mem, vec src) %{
18786 match(Set mem (StoreVector mem src));
18787 ins_cost(145);
18788 format %{ "store_vector $mem,$src\n\t" %}
18789 ins_encode %{
18790 switch (Matcher::vector_length_in_bytes(this, $src)) {
18791 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18792 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18793 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18794 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18795 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18796 default: ShouldNotReachHere();
18797 }
18798 %}
18799 ins_pipe( pipe_slow );
18800 %}
18801
18802 // ---------------------------------------- Gather ------------------------------------
18803
18804 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18805
18806 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18807 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18808 Matcher::vector_length_in_bytes(n) <= 32);
18809 match(Set dst (LoadVectorGather mem idx));
18810 effect(TEMP dst, TEMP tmp, TEMP mask);
18811 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18812 ins_encode %{
18813 int vlen_enc = vector_length_encoding(this);
18814 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18815 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18816 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18817 __ lea($tmp$$Register, $mem$$Address);
18818 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18819 %}
18820 ins_pipe( pipe_slow );
18821 %}
18822
18823
18824 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18825 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18826 !is_subword_type(Matcher::vector_element_basic_type(n)));
18827 match(Set dst (LoadVectorGather mem idx));
18828 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18829 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18830 ins_encode %{
18831 int vlen_enc = vector_length_encoding(this);
18832 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18833 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18834 __ lea($tmp$$Register, $mem$$Address);
18835 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18836 %}
18837 ins_pipe( pipe_slow );
18838 %}
18839
18840 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18841 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18842 !is_subword_type(Matcher::vector_element_basic_type(n)));
18843 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18844 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18845 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18846 ins_encode %{
18847 assert(UseAVX > 2, "sanity");
18848 int vlen_enc = vector_length_encoding(this);
18849 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18850 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18851 // Note: Since gather instruction partially updates the opmask register used
18852 // for predication hense moving mask operand to a temporary.
18853 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18854 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18855 __ lea($tmp$$Register, $mem$$Address);
18856 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18857 %}
18858 ins_pipe( pipe_slow );
18859 %}
18860
18861 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18862 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18863 match(Set dst (LoadVectorGather mem idx_base));
18864 effect(TEMP tmp, TEMP rtmp);
18865 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18866 ins_encode %{
18867 int vlen_enc = vector_length_encoding(this);
18868 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18869 __ lea($tmp$$Register, $mem$$Address);
18870 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18871 %}
18872 ins_pipe( pipe_slow );
18873 %}
18874
18875 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18876 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18877 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18878 match(Set dst (LoadVectorGather mem idx_base));
18879 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18880 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18881 ins_encode %{
18882 int vlen_enc = vector_length_encoding(this);
18883 int vector_len = Matcher::vector_length(this);
18884 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18885 __ lea($tmp$$Register, $mem$$Address);
18886 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18887 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18888 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18889 %}
18890 ins_pipe( pipe_slow );
18891 %}
18892
18893 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18894 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18895 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18896 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18897 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18898 ins_encode %{
18899 int vlen_enc = vector_length_encoding(this);
18900 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18901 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18902 __ lea($tmp$$Register, $mem$$Address);
18903 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18904 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18905 %}
18906 ins_pipe( pipe_slow );
18907 %}
18908
18909 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18910 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18911 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18912 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18913 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18914 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18915 ins_encode %{
18916 int vlen_enc = vector_length_encoding(this);
18917 int vector_len = Matcher::vector_length(this);
18918 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18919 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18920 __ lea($tmp$$Register, $mem$$Address);
18921 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18922 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18923 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18924 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18925 %}
18926 ins_pipe( pipe_slow );
18927 %}
18928
18929 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18930 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18931 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18932 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18933 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18934 ins_encode %{
18935 int vlen_enc = vector_length_encoding(this);
18936 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18937 __ lea($tmp$$Register, $mem$$Address);
18938 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18939 if (elem_bt == T_SHORT) {
18940 __ movl($mask_idx$$Register, 0x55555555);
18941 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18942 }
18943 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18944 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18945 %}
18946 ins_pipe( pipe_slow );
18947 %}
18948
18949 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18950 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18951 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18952 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18953 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18954 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18955 ins_encode %{
18956 int vlen_enc = vector_length_encoding(this);
18957 int vector_len = Matcher::vector_length(this);
18958 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18959 __ lea($tmp$$Register, $mem$$Address);
18960 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18961 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18962 if (elem_bt == T_SHORT) {
18963 __ movl($mask_idx$$Register, 0x55555555);
18964 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18965 }
18966 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18967 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18968 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18969 %}
18970 ins_pipe( pipe_slow );
18971 %}
18972
18973 // ====================Scatter=======================================
18974
18975 // Scatter INT, LONG, FLOAT, DOUBLE
18976
18977 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18978 predicate(UseAVX > 2);
18979 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18980 effect(TEMP tmp, TEMP ktmp);
18981 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18982 ins_encode %{
18983 int vlen_enc = vector_length_encoding(this, $src);
18984 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18985
18986 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18987 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18988
18989 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18990 __ lea($tmp$$Register, $mem$$Address);
18991 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18992 %}
18993 ins_pipe( pipe_slow );
18994 %}
18995
18996 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18997 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18998 effect(TEMP tmp, TEMP ktmp);
18999 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
19000 ins_encode %{
19001 int vlen_enc = vector_length_encoding(this, $src);
19002 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
19003 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
19004 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
19005 // Note: Since scatter instruction partially updates the opmask register used
19006 // for predication hense moving mask operand to a temporary.
19007 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
19008 __ lea($tmp$$Register, $mem$$Address);
19009 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
19010 %}
19011 ins_pipe( pipe_slow );
19012 %}
19013
19014 // ====================REPLICATE=======================================
19015
19016 // Replicate byte scalar to be vector
19017 instruct vReplB_reg(vec dst, rRegI src) %{
19018 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
19019 match(Set dst (Replicate src));
19020 format %{ "replicateB $dst,$src" %}
19021 ins_encode %{
19022 uint vlen = Matcher::vector_length(this);
19023 if (UseAVX >= 2) {
19024 int vlen_enc = vector_length_encoding(this);
19025 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19026 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
19027 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
19028 } else {
19029 __ movdl($dst$$XMMRegister, $src$$Register);
19030 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19031 }
19032 } else {
19033 assert(UseAVX < 2, "");
19034 __ movdl($dst$$XMMRegister, $src$$Register);
19035 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
19036 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19037 if (vlen >= 16) {
19038 assert(vlen == 16, "");
19039 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19040 }
19041 }
19042 %}
19043 ins_pipe( pipe_slow );
19044 %}
19045
19046 instruct ReplB_mem(vec dst, memory mem) %{
19047 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
19048 match(Set dst (Replicate (LoadB mem)));
19049 format %{ "replicateB $dst,$mem" %}
19050 ins_encode %{
19051 int vlen_enc = vector_length_encoding(this);
19052 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
19053 %}
19054 ins_pipe( pipe_slow );
19055 %}
19056
19057 // ====================ReplicateS=======================================
19058
19059 instruct vReplS_reg(vec dst, rRegI src) %{
19060 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
19061 match(Set dst (Replicate src));
19062 format %{ "replicateS $dst,$src" %}
19063 ins_encode %{
19064 uint vlen = Matcher::vector_length(this);
19065 int vlen_enc = vector_length_encoding(this);
19066 if (UseAVX >= 2) {
19067 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
19068 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
19069 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
19070 } else {
19071 __ movdl($dst$$XMMRegister, $src$$Register);
19072 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19073 }
19074 } else {
19075 assert(UseAVX < 2, "");
19076 __ movdl($dst$$XMMRegister, $src$$Register);
19077 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19078 if (vlen >= 8) {
19079 assert(vlen == 8, "");
19080 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19081 }
19082 }
19083 %}
19084 ins_pipe( pipe_slow );
19085 %}
19086
19087 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
19088 match(Set dst (Replicate con));
19089 effect(TEMP rtmp);
19090 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
19091 ins_encode %{
19092 int vlen_enc = vector_length_encoding(this);
19093 BasicType bt = Matcher::vector_element_basic_type(this);
19094 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
19095 __ movl($rtmp$$Register, $con$$constant);
19096 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19097 %}
19098 ins_pipe( pipe_slow );
19099 %}
19100
19101 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
19102 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
19103 match(Set dst (Replicate src));
19104 effect(TEMP rtmp);
19105 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
19106 ins_encode %{
19107 int vlen_enc = vector_length_encoding(this);
19108 __ vmovw($rtmp$$Register, $src$$XMMRegister);
19109 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
19110 %}
19111 ins_pipe( pipe_slow );
19112 %}
19113
19114 instruct ReplS_mem(vec dst, memory mem) %{
19115 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
19116 match(Set dst (Replicate (LoadS mem)));
19117 format %{ "replicateS $dst,$mem" %}
19118 ins_encode %{
19119 int vlen_enc = vector_length_encoding(this);
19120 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
19121 %}
19122 ins_pipe( pipe_slow );
19123 %}
19124
19125 // ====================ReplicateI=======================================
19126
19127 instruct ReplI_reg(vec dst, rRegI src) %{
19128 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19129 match(Set dst (Replicate src));
19130 format %{ "replicateI $dst,$src" %}
19131 ins_encode %{
19132 uint vlen = Matcher::vector_length(this);
19133 int vlen_enc = vector_length_encoding(this);
19134 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19135 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
19136 } else if (VM_Version::supports_avx2()) {
19137 __ movdl($dst$$XMMRegister, $src$$Register);
19138 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19139 } else {
19140 __ movdl($dst$$XMMRegister, $src$$Register);
19141 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19142 }
19143 %}
19144 ins_pipe( pipe_slow );
19145 %}
19146
19147 instruct ReplI_mem(vec dst, memory mem) %{
19148 predicate(Matcher::vector_element_basic_type(n) == T_INT);
19149 match(Set dst (Replicate (LoadI mem)));
19150 format %{ "replicateI $dst,$mem" %}
19151 ins_encode %{
19152 int vlen_enc = vector_length_encoding(this);
19153 if (VM_Version::supports_avx2()) {
19154 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19155 } else if (VM_Version::supports_avx()) {
19156 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19157 } else {
19158 __ movdl($dst$$XMMRegister, $mem$$Address);
19159 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
19160 }
19161 %}
19162 ins_pipe( pipe_slow );
19163 %}
19164
19165 instruct ReplI_imm(vec dst, immI con) %{
19166 predicate(Matcher::is_non_long_integral_vector(n));
19167 match(Set dst (Replicate con));
19168 format %{ "replicateI $dst,$con" %}
19169 ins_encode %{
19170 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
19171 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
19172 type2aelembytes(Matcher::vector_element_basic_type(this))));
19173 BasicType bt = Matcher::vector_element_basic_type(this);
19174 int vlen = Matcher::vector_length_in_bytes(this);
19175 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
19176 %}
19177 ins_pipe( pipe_slow );
19178 %}
19179
19180 // Replicate scalar zero to be vector
19181 instruct ReplI_zero(vec dst, immI_0 zero) %{
19182 predicate(Matcher::is_non_long_integral_vector(n));
19183 match(Set dst (Replicate zero));
19184 format %{ "replicateI $dst,$zero" %}
19185 ins_encode %{
19186 int vlen_enc = vector_length_encoding(this);
19187 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19188 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19189 } else {
19190 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19191 }
19192 %}
19193 ins_pipe( fpu_reg_reg );
19194 %}
19195
19196 instruct ReplI_M1(vec dst, immI_M1 con) %{
19197 predicate(Matcher::is_non_long_integral_vector(n));
19198 match(Set dst (Replicate con));
19199 format %{ "vallones $dst" %}
19200 ins_encode %{
19201 int vector_len = vector_length_encoding(this);
19202 __ vallones($dst$$XMMRegister, vector_len);
19203 %}
19204 ins_pipe( pipe_slow );
19205 %}
19206
19207 // ====================ReplicateL=======================================
19208
19209 // Replicate long (8 byte) scalar to be vector
19210 instruct ReplL_reg(vec dst, rRegL src) %{
19211 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19212 match(Set dst (Replicate src));
19213 format %{ "replicateL $dst,$src" %}
19214 ins_encode %{
19215 int vlen = Matcher::vector_length(this);
19216 int vlen_enc = vector_length_encoding(this);
19217 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
19218 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
19219 } else if (VM_Version::supports_avx2()) {
19220 __ movdq($dst$$XMMRegister, $src$$Register);
19221 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19222 } else {
19223 __ movdq($dst$$XMMRegister, $src$$Register);
19224 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19225 }
19226 %}
19227 ins_pipe( pipe_slow );
19228 %}
19229
19230 instruct ReplL_mem(vec dst, memory mem) %{
19231 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19232 match(Set dst (Replicate (LoadL mem)));
19233 format %{ "replicateL $dst,$mem" %}
19234 ins_encode %{
19235 int vlen_enc = vector_length_encoding(this);
19236 if (VM_Version::supports_avx2()) {
19237 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
19238 } else if (VM_Version::supports_sse3()) {
19239 __ movddup($dst$$XMMRegister, $mem$$Address);
19240 } else {
19241 __ movq($dst$$XMMRegister, $mem$$Address);
19242 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
19243 }
19244 %}
19245 ins_pipe( pipe_slow );
19246 %}
19247
19248 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
19249 instruct ReplL_imm(vec dst, immL con) %{
19250 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19251 match(Set dst (Replicate con));
19252 format %{ "replicateL $dst,$con" %}
19253 ins_encode %{
19254 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19255 int vlen = Matcher::vector_length_in_bytes(this);
19256 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19257 %}
19258 ins_pipe( pipe_slow );
19259 %}
19260
19261 instruct ReplL_zero(vec dst, immL0 zero) %{
19262 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19263 match(Set dst (Replicate zero));
19264 format %{ "replicateL $dst,$zero" %}
19265 ins_encode %{
19266 int vlen_enc = vector_length_encoding(this);
19267 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19268 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19269 } else {
19270 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19271 }
19272 %}
19273 ins_pipe( fpu_reg_reg );
19274 %}
19275
19276 instruct ReplL_M1(vec dst, immL_M1 con) %{
19277 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19278 match(Set dst (Replicate con));
19279 format %{ "vallones $dst" %}
19280 ins_encode %{
19281 int vector_len = vector_length_encoding(this);
19282 __ vallones($dst$$XMMRegister, vector_len);
19283 %}
19284 ins_pipe( pipe_slow );
19285 %}
19286
19287 // ====================ReplicateF=======================================
19288
19289 instruct vReplF_reg(vec dst, vlRegF src) %{
19290 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19291 match(Set dst (Replicate src));
19292 format %{ "replicateF $dst,$src" %}
19293 ins_encode %{
19294 uint vlen = Matcher::vector_length(this);
19295 int vlen_enc = vector_length_encoding(this);
19296 if (vlen <= 4) {
19297 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19298 } else if (VM_Version::supports_avx2()) {
19299 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19300 } else {
19301 assert(vlen == 8, "sanity");
19302 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19303 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19304 }
19305 %}
19306 ins_pipe( pipe_slow );
19307 %}
19308
19309 instruct ReplF_reg(vec dst, vlRegF src) %{
19310 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19311 match(Set dst (Replicate src));
19312 format %{ "replicateF $dst,$src" %}
19313 ins_encode %{
19314 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19315 %}
19316 ins_pipe( pipe_slow );
19317 %}
19318
19319 instruct ReplF_mem(vec dst, memory mem) %{
19320 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19321 match(Set dst (Replicate (LoadF mem)));
19322 format %{ "replicateF $dst,$mem" %}
19323 ins_encode %{
19324 int vlen_enc = vector_length_encoding(this);
19325 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19326 %}
19327 ins_pipe( pipe_slow );
19328 %}
19329
19330 // Replicate float scalar immediate to be vector by loading from const table.
19331 instruct ReplF_imm(vec dst, immF con) %{
19332 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19333 match(Set dst (Replicate con));
19334 format %{ "replicateF $dst,$con" %}
19335 ins_encode %{
19336 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19337 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19338 int vlen = Matcher::vector_length_in_bytes(this);
19339 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19340 %}
19341 ins_pipe( pipe_slow );
19342 %}
19343
19344 instruct ReplF_zero(vec dst, immF0 zero) %{
19345 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19346 match(Set dst (Replicate zero));
19347 format %{ "replicateF $dst,$zero" %}
19348 ins_encode %{
19349 int vlen_enc = vector_length_encoding(this);
19350 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19351 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19352 } else {
19353 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19354 }
19355 %}
19356 ins_pipe( fpu_reg_reg );
19357 %}
19358
19359 // ====================ReplicateD=======================================
19360
19361 // Replicate double (8 bytes) scalar to be vector
19362 instruct vReplD_reg(vec dst, vlRegD src) %{
19363 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19364 match(Set dst (Replicate src));
19365 format %{ "replicateD $dst,$src" %}
19366 ins_encode %{
19367 uint vlen = Matcher::vector_length(this);
19368 int vlen_enc = vector_length_encoding(this);
19369 if (vlen <= 2) {
19370 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19371 } else if (VM_Version::supports_avx2()) {
19372 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19373 } else {
19374 assert(vlen == 4, "sanity");
19375 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19376 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19377 }
19378 %}
19379 ins_pipe( pipe_slow );
19380 %}
19381
19382 instruct ReplD_reg(vec dst, vlRegD src) %{
19383 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19384 match(Set dst (Replicate src));
19385 format %{ "replicateD $dst,$src" %}
19386 ins_encode %{
19387 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19388 %}
19389 ins_pipe( pipe_slow );
19390 %}
19391
19392 instruct ReplD_mem(vec dst, memory mem) %{
19393 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19394 match(Set dst (Replicate (LoadD mem)));
19395 format %{ "replicateD $dst,$mem" %}
19396 ins_encode %{
19397 if (Matcher::vector_length(this) >= 4) {
19398 int vlen_enc = vector_length_encoding(this);
19399 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19400 } else {
19401 __ movddup($dst$$XMMRegister, $mem$$Address);
19402 }
19403 %}
19404 ins_pipe( pipe_slow );
19405 %}
19406
19407 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19408 instruct ReplD_imm(vec dst, immD con) %{
19409 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19410 match(Set dst (Replicate con));
19411 format %{ "replicateD $dst,$con" %}
19412 ins_encode %{
19413 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19414 int vlen = Matcher::vector_length_in_bytes(this);
19415 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19416 %}
19417 ins_pipe( pipe_slow );
19418 %}
19419
19420 instruct ReplD_zero(vec dst, immD0 zero) %{
19421 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19422 match(Set dst (Replicate zero));
19423 format %{ "replicateD $dst,$zero" %}
19424 ins_encode %{
19425 int vlen_enc = vector_length_encoding(this);
19426 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19427 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19428 } else {
19429 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19430 }
19431 %}
19432 ins_pipe( fpu_reg_reg );
19433 %}
19434
19435 // ====================VECTOR INSERT=======================================
19436
19437 instruct insert(vec dst, rRegI val, immU8 idx) %{
19438 predicate(Matcher::vector_length_in_bytes(n) < 32);
19439 match(Set dst (VectorInsert (Binary dst val) idx));
19440 format %{ "vector_insert $dst,$val,$idx" %}
19441 ins_encode %{
19442 assert(UseSSE >= 4, "required");
19443 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19444
19445 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19446
19447 assert(is_integral_type(elem_bt), "");
19448 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19449
19450 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19451 %}
19452 ins_pipe( pipe_slow );
19453 %}
19454
19455 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19456 predicate(Matcher::vector_length_in_bytes(n) == 32);
19457 match(Set dst (VectorInsert (Binary src val) idx));
19458 effect(TEMP vtmp);
19459 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19460 ins_encode %{
19461 int vlen_enc = Assembler::AVX_256bit;
19462 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19463 int elem_per_lane = 16/type2aelembytes(elem_bt);
19464 int log2epr = log2(elem_per_lane);
19465
19466 assert(is_integral_type(elem_bt), "sanity");
19467 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19468
19469 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19470 uint y_idx = ($idx$$constant >> log2epr) & 1;
19471 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19472 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19473 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19474 %}
19475 ins_pipe( pipe_slow );
19476 %}
19477
19478 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19479 predicate(Matcher::vector_length_in_bytes(n) == 64);
19480 match(Set dst (VectorInsert (Binary src val) idx));
19481 effect(TEMP vtmp);
19482 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19483 ins_encode %{
19484 assert(UseAVX > 2, "sanity");
19485
19486 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19487 int elem_per_lane = 16/type2aelembytes(elem_bt);
19488 int log2epr = log2(elem_per_lane);
19489
19490 assert(is_integral_type(elem_bt), "");
19491 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19492
19493 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19494 uint y_idx = ($idx$$constant >> log2epr) & 3;
19495 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19496 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19497 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19498 %}
19499 ins_pipe( pipe_slow );
19500 %}
19501
19502 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19503 predicate(Matcher::vector_length(n) == 2);
19504 match(Set dst (VectorInsert (Binary dst val) idx));
19505 format %{ "vector_insert $dst,$val,$idx" %}
19506 ins_encode %{
19507 assert(UseSSE >= 4, "required");
19508 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19509 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19510
19511 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19512 %}
19513 ins_pipe( pipe_slow );
19514 %}
19515
19516 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19517 predicate(Matcher::vector_length(n) == 4);
19518 match(Set dst (VectorInsert (Binary src val) idx));
19519 effect(TEMP vtmp);
19520 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19521 ins_encode %{
19522 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19523 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19524
19525 uint x_idx = $idx$$constant & right_n_bits(1);
19526 uint y_idx = ($idx$$constant >> 1) & 1;
19527 int vlen_enc = Assembler::AVX_256bit;
19528 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19529 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19530 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19531 %}
19532 ins_pipe( pipe_slow );
19533 %}
19534
19535 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19536 predicate(Matcher::vector_length(n) == 8);
19537 match(Set dst (VectorInsert (Binary src val) idx));
19538 effect(TEMP vtmp);
19539 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19540 ins_encode %{
19541 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19542 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19543
19544 uint x_idx = $idx$$constant & right_n_bits(1);
19545 uint y_idx = ($idx$$constant >> 1) & 3;
19546 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19547 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19548 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19549 %}
19550 ins_pipe( pipe_slow );
19551 %}
19552
19553 instruct insertF(vec dst, regF val, immU8 idx) %{
19554 predicate(Matcher::vector_length(n) < 8);
19555 match(Set dst (VectorInsert (Binary dst val) idx));
19556 format %{ "vector_insert $dst,$val,$idx" %}
19557 ins_encode %{
19558 assert(UseSSE >= 4, "sanity");
19559
19560 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19561 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19562
19563 uint x_idx = $idx$$constant & right_n_bits(2);
19564 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19565 %}
19566 ins_pipe( pipe_slow );
19567 %}
19568
19569 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19570 predicate(Matcher::vector_length(n) >= 8);
19571 match(Set dst (VectorInsert (Binary src val) idx));
19572 effect(TEMP vtmp);
19573 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19574 ins_encode %{
19575 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19576 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19577
19578 int vlen = Matcher::vector_length(this);
19579 uint x_idx = $idx$$constant & right_n_bits(2);
19580 if (vlen == 8) {
19581 uint y_idx = ($idx$$constant >> 2) & 1;
19582 int vlen_enc = Assembler::AVX_256bit;
19583 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19584 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19585 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19586 } else {
19587 assert(vlen == 16, "sanity");
19588 uint y_idx = ($idx$$constant >> 2) & 3;
19589 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19590 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19591 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19592 }
19593 %}
19594 ins_pipe( pipe_slow );
19595 %}
19596
19597 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19598 predicate(Matcher::vector_length(n) == 2);
19599 match(Set dst (VectorInsert (Binary dst val) idx));
19600 effect(TEMP tmp);
19601 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19602 ins_encode %{
19603 assert(UseSSE >= 4, "sanity");
19604 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19605 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19606
19607 __ movq($tmp$$Register, $val$$XMMRegister);
19608 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19609 %}
19610 ins_pipe( pipe_slow );
19611 %}
19612
19613 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19614 predicate(Matcher::vector_length(n) == 4);
19615 match(Set dst (VectorInsert (Binary src val) idx));
19616 effect(TEMP vtmp, TEMP tmp);
19617 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19618 ins_encode %{
19619 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19620 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19621
19622 uint x_idx = $idx$$constant & right_n_bits(1);
19623 uint y_idx = ($idx$$constant >> 1) & 1;
19624 int vlen_enc = Assembler::AVX_256bit;
19625 __ movq($tmp$$Register, $val$$XMMRegister);
19626 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19627 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19628 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19629 %}
19630 ins_pipe( pipe_slow );
19631 %}
19632
19633 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19634 predicate(Matcher::vector_length(n) == 8);
19635 match(Set dst (VectorInsert (Binary src val) idx));
19636 effect(TEMP tmp, TEMP vtmp);
19637 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19638 ins_encode %{
19639 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19640 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19641
19642 uint x_idx = $idx$$constant & right_n_bits(1);
19643 uint y_idx = ($idx$$constant >> 1) & 3;
19644 __ movq($tmp$$Register, $val$$XMMRegister);
19645 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19646 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19647 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19648 %}
19649 ins_pipe( pipe_slow );
19650 %}
19651
19652 // ====================REDUCTION ARITHMETIC=======================================
19653
19654 // =======================Int Reduction==========================================
19655
19656 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19657 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19658 match(Set dst (AddReductionVI src1 src2));
19659 match(Set dst (MulReductionVI src1 src2));
19660 match(Set dst (AndReductionV src1 src2));
19661 match(Set dst ( OrReductionV src1 src2));
19662 match(Set dst (XorReductionV src1 src2));
19663 match(Set dst (MinReductionV src1 src2));
19664 match(Set dst (MaxReductionV src1 src2));
19665 effect(TEMP vtmp1, TEMP vtmp2);
19666 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19667 ins_encode %{
19668 int opcode = this->ideal_Opcode();
19669 int vlen = Matcher::vector_length(this, $src2);
19670 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19671 %}
19672 ins_pipe( pipe_slow );
19673 %}
19674
19675 // =======================Long Reduction==========================================
19676
19677 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19678 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19679 match(Set dst (AddReductionVL src1 src2));
19680 match(Set dst (MulReductionVL src1 src2));
19681 match(Set dst (AndReductionV src1 src2));
19682 match(Set dst ( OrReductionV src1 src2));
19683 match(Set dst (XorReductionV src1 src2));
19684 match(Set dst (MinReductionV src1 src2));
19685 match(Set dst (MaxReductionV src1 src2));
19686 effect(TEMP vtmp1, TEMP vtmp2);
19687 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19688 ins_encode %{
19689 int opcode = this->ideal_Opcode();
19690 int vlen = Matcher::vector_length(this, $src2);
19691 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19692 %}
19693 ins_pipe( pipe_slow );
19694 %}
19695
19696 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19697 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19698 match(Set dst (AddReductionVL src1 src2));
19699 match(Set dst (MulReductionVL src1 src2));
19700 match(Set dst (AndReductionV src1 src2));
19701 match(Set dst ( OrReductionV src1 src2));
19702 match(Set dst (XorReductionV src1 src2));
19703 match(Set dst (MinReductionV src1 src2));
19704 match(Set dst (MaxReductionV src1 src2));
19705 effect(TEMP vtmp1, TEMP vtmp2);
19706 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19707 ins_encode %{
19708 int opcode = this->ideal_Opcode();
19709 int vlen = Matcher::vector_length(this, $src2);
19710 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19711 %}
19712 ins_pipe( pipe_slow );
19713 %}
19714
19715 // =======================Float Reduction==========================================
19716
19717 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19718 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19719 match(Set dst (AddReductionVF dst src));
19720 match(Set dst (MulReductionVF dst src));
19721 effect(TEMP dst, TEMP vtmp);
19722 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19723 ins_encode %{
19724 int opcode = this->ideal_Opcode();
19725 int vlen = Matcher::vector_length(this, $src);
19726 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19727 %}
19728 ins_pipe( pipe_slow );
19729 %}
19730
19731 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19732 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19733 match(Set dst (AddReductionVF dst src));
19734 match(Set dst (MulReductionVF dst src));
19735 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19736 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19737 ins_encode %{
19738 int opcode = this->ideal_Opcode();
19739 int vlen = Matcher::vector_length(this, $src);
19740 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19741 %}
19742 ins_pipe( pipe_slow );
19743 %}
19744
19745 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19746 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19747 match(Set dst (AddReductionVF dst src));
19748 match(Set dst (MulReductionVF dst src));
19749 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19750 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19751 ins_encode %{
19752 int opcode = this->ideal_Opcode();
19753 int vlen = Matcher::vector_length(this, $src);
19754 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19755 %}
19756 ins_pipe( pipe_slow );
19757 %}
19758
19759
19760 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19761 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19762 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19763 // src1 contains reduction identity
19764 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19765 match(Set dst (AddReductionVF src1 src2));
19766 match(Set dst (MulReductionVF src1 src2));
19767 effect(TEMP dst);
19768 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19769 ins_encode %{
19770 int opcode = this->ideal_Opcode();
19771 int vlen = Matcher::vector_length(this, $src2);
19772 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19773 %}
19774 ins_pipe( pipe_slow );
19775 %}
19776
19777 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19778 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19779 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19780 // src1 contains reduction identity
19781 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19782 match(Set dst (AddReductionVF src1 src2));
19783 match(Set dst (MulReductionVF src1 src2));
19784 effect(TEMP dst, TEMP vtmp);
19785 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19786 ins_encode %{
19787 int opcode = this->ideal_Opcode();
19788 int vlen = Matcher::vector_length(this, $src2);
19789 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19790 %}
19791 ins_pipe( pipe_slow );
19792 %}
19793
19794 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19795 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19796 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19797 // src1 contains reduction identity
19798 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19799 match(Set dst (AddReductionVF src1 src2));
19800 match(Set dst (MulReductionVF src1 src2));
19801 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19802 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19803 ins_encode %{
19804 int opcode = this->ideal_Opcode();
19805 int vlen = Matcher::vector_length(this, $src2);
19806 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19807 %}
19808 ins_pipe( pipe_slow );
19809 %}
19810
19811 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19812 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19813 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19814 // src1 contains reduction identity
19815 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19816 match(Set dst (AddReductionVF src1 src2));
19817 match(Set dst (MulReductionVF src1 src2));
19818 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19819 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19820 ins_encode %{
19821 int opcode = this->ideal_Opcode();
19822 int vlen = Matcher::vector_length(this, $src2);
19823 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19824 %}
19825 ins_pipe( pipe_slow );
19826 %}
19827
19828 // =======================Double Reduction==========================================
19829
19830 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19831 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19832 match(Set dst (AddReductionVD dst src));
19833 match(Set dst (MulReductionVD dst src));
19834 effect(TEMP dst, TEMP vtmp);
19835 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19836 ins_encode %{
19837 int opcode = this->ideal_Opcode();
19838 int vlen = Matcher::vector_length(this, $src);
19839 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19840 %}
19841 ins_pipe( pipe_slow );
19842 %}
19843
19844 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19845 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19846 match(Set dst (AddReductionVD dst src));
19847 match(Set dst (MulReductionVD dst src));
19848 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19849 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19850 ins_encode %{
19851 int opcode = this->ideal_Opcode();
19852 int vlen = Matcher::vector_length(this, $src);
19853 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19854 %}
19855 ins_pipe( pipe_slow );
19856 %}
19857
19858 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19859 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19860 match(Set dst (AddReductionVD dst src));
19861 match(Set dst (MulReductionVD dst src));
19862 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19863 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19864 ins_encode %{
19865 int opcode = this->ideal_Opcode();
19866 int vlen = Matcher::vector_length(this, $src);
19867 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19868 %}
19869 ins_pipe( pipe_slow );
19870 %}
19871
19872 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19873 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19874 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19875 // src1 contains reduction identity
19876 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19877 match(Set dst (AddReductionVD src1 src2));
19878 match(Set dst (MulReductionVD src1 src2));
19879 effect(TEMP dst);
19880 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19881 ins_encode %{
19882 int opcode = this->ideal_Opcode();
19883 int vlen = Matcher::vector_length(this, $src2);
19884 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19885 %}
19886 ins_pipe( pipe_slow );
19887 %}
19888
19889 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19890 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19891 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19892 // src1 contains reduction identity
19893 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19894 match(Set dst (AddReductionVD src1 src2));
19895 match(Set dst (MulReductionVD src1 src2));
19896 effect(TEMP dst, TEMP vtmp);
19897 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19898 ins_encode %{
19899 int opcode = this->ideal_Opcode();
19900 int vlen = Matcher::vector_length(this, $src2);
19901 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19902 %}
19903 ins_pipe( pipe_slow );
19904 %}
19905
19906 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19907 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19908 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19909 // src1 contains reduction identity
19910 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19911 match(Set dst (AddReductionVD src1 src2));
19912 match(Set dst (MulReductionVD src1 src2));
19913 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19914 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19915 ins_encode %{
19916 int opcode = this->ideal_Opcode();
19917 int vlen = Matcher::vector_length(this, $src2);
19918 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19919 %}
19920 ins_pipe( pipe_slow );
19921 %}
19922
19923 // =======================Byte Reduction==========================================
19924
19925 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19926 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19927 match(Set dst (AddReductionVI src1 src2));
19928 match(Set dst (AndReductionV src1 src2));
19929 match(Set dst ( OrReductionV src1 src2));
19930 match(Set dst (XorReductionV src1 src2));
19931 match(Set dst (MinReductionV src1 src2));
19932 match(Set dst (MaxReductionV src1 src2));
19933 effect(TEMP vtmp1, TEMP vtmp2);
19934 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19935 ins_encode %{
19936 int opcode = this->ideal_Opcode();
19937 int vlen = Matcher::vector_length(this, $src2);
19938 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19939 %}
19940 ins_pipe( pipe_slow );
19941 %}
19942
19943 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19944 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19945 match(Set dst (AddReductionVI src1 src2));
19946 match(Set dst (AndReductionV src1 src2));
19947 match(Set dst ( OrReductionV src1 src2));
19948 match(Set dst (XorReductionV src1 src2));
19949 match(Set dst (MinReductionV src1 src2));
19950 match(Set dst (MaxReductionV src1 src2));
19951 effect(TEMP vtmp1, TEMP vtmp2);
19952 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19953 ins_encode %{
19954 int opcode = this->ideal_Opcode();
19955 int vlen = Matcher::vector_length(this, $src2);
19956 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19957 %}
19958 ins_pipe( pipe_slow );
19959 %}
19960
19961 // =======================Short Reduction==========================================
19962
19963 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19964 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19965 match(Set dst (AddReductionVI src1 src2));
19966 match(Set dst (MulReductionVI src1 src2));
19967 match(Set dst (AndReductionV src1 src2));
19968 match(Set dst ( OrReductionV src1 src2));
19969 match(Set dst (XorReductionV src1 src2));
19970 match(Set dst (MinReductionV src1 src2));
19971 match(Set dst (MaxReductionV src1 src2));
19972 effect(TEMP vtmp1, TEMP vtmp2);
19973 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19974 ins_encode %{
19975 int opcode = this->ideal_Opcode();
19976 int vlen = Matcher::vector_length(this, $src2);
19977 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19978 %}
19979 ins_pipe( pipe_slow );
19980 %}
19981
19982 // =======================Mul Reduction==========================================
19983
19984 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19985 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19986 Matcher::vector_length(n->in(2)) <= 32); // src2
19987 match(Set dst (MulReductionVI src1 src2));
19988 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19989 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19990 ins_encode %{
19991 int opcode = this->ideal_Opcode();
19992 int vlen = Matcher::vector_length(this, $src2);
19993 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19994 %}
19995 ins_pipe( pipe_slow );
19996 %}
19997
19998 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19999 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
20000 Matcher::vector_length(n->in(2)) == 64); // src2
20001 match(Set dst (MulReductionVI src1 src2));
20002 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
20003 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
20004 ins_encode %{
20005 int opcode = this->ideal_Opcode();
20006 int vlen = Matcher::vector_length(this, $src2);
20007 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
20008 %}
20009 ins_pipe( pipe_slow );
20010 %}
20011
20012 //--------------------Min/Max Float Reduction --------------------
20013 // Float Min Reduction
20014 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20015 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20016 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20017 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20018 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20019 Matcher::vector_length(n->in(2)) == 2);
20020 match(Set dst (MinReductionV src1 src2));
20021 match(Set dst (MaxReductionV src1 src2));
20022 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20023 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20024 ins_encode %{
20025 assert(UseAVX > 0, "sanity");
20026
20027 int opcode = this->ideal_Opcode();
20028 int vlen = Matcher::vector_length(this, $src2);
20029 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20030 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20031 %}
20032 ins_pipe( pipe_slow );
20033 %}
20034
20035 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
20036 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20037 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20038 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20039 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20040 Matcher::vector_length(n->in(2)) >= 4);
20041 match(Set dst (MinReductionV src1 src2));
20042 match(Set dst (MaxReductionV src1 src2));
20043 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20044 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20045 ins_encode %{
20046 assert(UseAVX > 0, "sanity");
20047
20048 int opcode = this->ideal_Opcode();
20049 int vlen = Matcher::vector_length(this, $src2);
20050 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
20051 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20052 %}
20053 ins_pipe( pipe_slow );
20054 %}
20055
20056 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
20057 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
20058 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20059 Matcher::vector_length(n->in(2)) == 2);
20060 match(Set dst (MinReductionV dst src));
20061 match(Set dst (MaxReductionV dst src));
20062 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
20063 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
20064 ins_encode %{
20065 assert(UseAVX > 0, "sanity");
20066
20067 int opcode = this->ideal_Opcode();
20068 int vlen = Matcher::vector_length(this, $src);
20069 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20070 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
20071 %}
20072 ins_pipe( pipe_slow );
20073 %}
20074
20075
20076 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
20077 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
20078 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20079 Matcher::vector_length(n->in(2)) >= 4);
20080 match(Set dst (MinReductionV dst src));
20081 match(Set dst (MaxReductionV dst src));
20082 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
20083 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
20084 ins_encode %{
20085 assert(UseAVX > 0, "sanity");
20086
20087 int opcode = this->ideal_Opcode();
20088 int vlen = Matcher::vector_length(this, $src);
20089 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
20090 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
20091 %}
20092 ins_pipe( pipe_slow );
20093 %}
20094
20095 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
20096 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20097 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20098 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20099 Matcher::vector_length(n->in(2)) == 2);
20100 match(Set dst (MinReductionV src1 src2));
20101 match(Set dst (MaxReductionV src1 src2));
20102 effect(TEMP dst, TEMP xtmp1);
20103 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
20104 ins_encode %{
20105 int opcode = this->ideal_Opcode();
20106 int vlen = Matcher::vector_length(this, $src2);
20107 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20108 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20109 %}
20110 ins_pipe( pipe_slow );
20111 %}
20112
20113 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
20114 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20115 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
20116 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
20117 Matcher::vector_length(n->in(2)) >= 4);
20118 match(Set dst (MinReductionV src1 src2));
20119 match(Set dst (MaxReductionV src1 src2));
20120 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20121 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
20122 ins_encode %{
20123 int opcode = this->ideal_Opcode();
20124 int vlen = Matcher::vector_length(this, $src2);
20125 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20126 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20127 %}
20128 ins_pipe( pipe_slow );
20129 %}
20130
20131 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
20132 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20133 Matcher::vector_length(n->in(2)) == 2);
20134 match(Set dst (MinReductionV dst src));
20135 match(Set dst (MaxReductionV dst src));
20136 effect(TEMP dst, TEMP xtmp1);
20137 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
20138 ins_encode %{
20139 int opcode = this->ideal_Opcode();
20140 int vlen = Matcher::vector_length(this, $src);
20141 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20142 $xtmp1$$XMMRegister);
20143 %}
20144 ins_pipe( pipe_slow );
20145 %}
20146
20147 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
20148 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
20149 Matcher::vector_length(n->in(2)) >= 4);
20150 match(Set dst (MinReductionV dst src));
20151 match(Set dst (MaxReductionV dst src));
20152 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20153 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
20154 ins_encode %{
20155 int opcode = this->ideal_Opcode();
20156 int vlen = Matcher::vector_length(this, $src);
20157 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
20158 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20159 %}
20160 ins_pipe( pipe_slow );
20161 %}
20162
20163 //--------------------Min Double Reduction --------------------
20164 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20165 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20166 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20167 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20168 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20169 Matcher::vector_length(n->in(2)) == 2);
20170 match(Set dst (MinReductionV src1 src2));
20171 match(Set dst (MaxReductionV src1 src2));
20172 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20173 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20174 ins_encode %{
20175 assert(UseAVX > 0, "sanity");
20176
20177 int opcode = this->ideal_Opcode();
20178 int vlen = Matcher::vector_length(this, $src2);
20179 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20180 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20181 %}
20182 ins_pipe( pipe_slow );
20183 %}
20184
20185 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
20186 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20187 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20188 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20189 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20190 Matcher::vector_length(n->in(2)) >= 4);
20191 match(Set dst (MinReductionV src1 src2));
20192 match(Set dst (MaxReductionV src1 src2));
20193 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20194 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20195 ins_encode %{
20196 assert(UseAVX > 0, "sanity");
20197
20198 int opcode = this->ideal_Opcode();
20199 int vlen = Matcher::vector_length(this, $src2);
20200 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
20201 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20202 %}
20203 ins_pipe( pipe_slow );
20204 %}
20205
20206
20207 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
20208 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
20209 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20210 Matcher::vector_length(n->in(2)) == 2);
20211 match(Set dst (MinReductionV dst src));
20212 match(Set dst (MaxReductionV dst src));
20213 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
20214 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
20215 ins_encode %{
20216 assert(UseAVX > 0, "sanity");
20217
20218 int opcode = this->ideal_Opcode();
20219 int vlen = Matcher::vector_length(this, $src);
20220 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20221 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
20222 %}
20223 ins_pipe( pipe_slow );
20224 %}
20225
20226 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
20227 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
20228 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20229 Matcher::vector_length(n->in(2)) >= 4);
20230 match(Set dst (MinReductionV dst src));
20231 match(Set dst (MaxReductionV dst src));
20232 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
20233 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
20234 ins_encode %{
20235 assert(UseAVX > 0, "sanity");
20236
20237 int opcode = this->ideal_Opcode();
20238 int vlen = Matcher::vector_length(this, $src);
20239 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20240 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
20241 %}
20242 ins_pipe( pipe_slow );
20243 %}
20244
20245 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20246 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20247 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20248 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20249 Matcher::vector_length(n->in(2)) == 2);
20250 match(Set dst (MinReductionV src1 src2));
20251 match(Set dst (MaxReductionV src1 src2));
20252 effect(TEMP dst, TEMP xtmp1);
20253 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20254 ins_encode %{
20255 int opcode = this->ideal_Opcode();
20256 int vlen = Matcher::vector_length(this, $src2);
20257 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20258 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20259 %}
20260 ins_pipe( pipe_slow );
20261 %}
20262
20263 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20264 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20265 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20266 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20267 Matcher::vector_length(n->in(2)) >= 4);
20268 match(Set dst (MinReductionV src1 src2));
20269 match(Set dst (MaxReductionV src1 src2));
20270 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20271 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20272 ins_encode %{
20273 int opcode = this->ideal_Opcode();
20274 int vlen = Matcher::vector_length(this, $src2);
20275 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20276 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20277 %}
20278 ins_pipe( pipe_slow );
20279 %}
20280
20281
20282 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20283 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20284 Matcher::vector_length(n->in(2)) == 2);
20285 match(Set dst (MinReductionV dst src));
20286 match(Set dst (MaxReductionV dst src));
20287 effect(TEMP dst, TEMP xtmp1);
20288 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20289 ins_encode %{
20290 int opcode = this->ideal_Opcode();
20291 int vlen = Matcher::vector_length(this, $src);
20292 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20293 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20294 %}
20295 ins_pipe( pipe_slow );
20296 %}
20297
20298 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20299 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20300 Matcher::vector_length(n->in(2)) >= 4);
20301 match(Set dst (MinReductionV dst src));
20302 match(Set dst (MaxReductionV dst src));
20303 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20304 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20305 ins_encode %{
20306 int opcode = this->ideal_Opcode();
20307 int vlen = Matcher::vector_length(this, $src);
20308 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20309 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20310 %}
20311 ins_pipe( pipe_slow );
20312 %}
20313
20314 // ====================VECTOR ARITHMETIC=======================================
20315
20316 // --------------------------------- ADD --------------------------------------
20317
20318 // Bytes vector add
20319 instruct vaddB(vec dst, vec src) %{
20320 predicate(UseAVX == 0);
20321 match(Set dst (AddVB dst src));
20322 format %{ "paddb $dst,$src\t! add packedB" %}
20323 ins_encode %{
20324 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20325 %}
20326 ins_pipe( pipe_slow );
20327 %}
20328
20329 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20330 predicate(UseAVX > 0);
20331 match(Set dst (AddVB src1 src2));
20332 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20333 ins_encode %{
20334 int vlen_enc = vector_length_encoding(this);
20335 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20336 %}
20337 ins_pipe( pipe_slow );
20338 %}
20339
20340 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20341 predicate((UseAVX > 0) &&
20342 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20343 match(Set dst (AddVB src (LoadVector mem)));
20344 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20345 ins_encode %{
20346 int vlen_enc = vector_length_encoding(this);
20347 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20348 %}
20349 ins_pipe( pipe_slow );
20350 %}
20351
20352 // Shorts/Chars vector add
20353 instruct vaddS(vec dst, vec src) %{
20354 predicate(UseAVX == 0);
20355 match(Set dst (AddVS dst src));
20356 format %{ "paddw $dst,$src\t! add packedS" %}
20357 ins_encode %{
20358 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20359 %}
20360 ins_pipe( pipe_slow );
20361 %}
20362
20363 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20364 predicate(UseAVX > 0);
20365 match(Set dst (AddVS src1 src2));
20366 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20367 ins_encode %{
20368 int vlen_enc = vector_length_encoding(this);
20369 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20370 %}
20371 ins_pipe( pipe_slow );
20372 %}
20373
20374 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20375 predicate((UseAVX > 0) &&
20376 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20377 match(Set dst (AddVS src (LoadVector mem)));
20378 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20379 ins_encode %{
20380 int vlen_enc = vector_length_encoding(this);
20381 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20382 %}
20383 ins_pipe( pipe_slow );
20384 %}
20385
20386 // Integers vector add
20387 instruct vaddI(vec dst, vec src) %{
20388 predicate(UseAVX == 0);
20389 match(Set dst (AddVI dst src));
20390 format %{ "paddd $dst,$src\t! add packedI" %}
20391 ins_encode %{
20392 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20393 %}
20394 ins_pipe( pipe_slow );
20395 %}
20396
20397 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20398 predicate(UseAVX > 0);
20399 match(Set dst (AddVI src1 src2));
20400 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20401 ins_encode %{
20402 int vlen_enc = vector_length_encoding(this);
20403 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20404 %}
20405 ins_pipe( pipe_slow );
20406 %}
20407
20408
20409 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20410 predicate((UseAVX > 0) &&
20411 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20412 match(Set dst (AddVI src (LoadVector mem)));
20413 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20414 ins_encode %{
20415 int vlen_enc = vector_length_encoding(this);
20416 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20417 %}
20418 ins_pipe( pipe_slow );
20419 %}
20420
20421 // Longs vector add
20422 instruct vaddL(vec dst, vec src) %{
20423 predicate(UseAVX == 0);
20424 match(Set dst (AddVL dst src));
20425 format %{ "paddq $dst,$src\t! add packedL" %}
20426 ins_encode %{
20427 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20428 %}
20429 ins_pipe( pipe_slow );
20430 %}
20431
20432 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20433 predicate(UseAVX > 0);
20434 match(Set dst (AddVL src1 src2));
20435 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20436 ins_encode %{
20437 int vlen_enc = vector_length_encoding(this);
20438 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20439 %}
20440 ins_pipe( pipe_slow );
20441 %}
20442
20443 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20444 predicate((UseAVX > 0) &&
20445 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20446 match(Set dst (AddVL src (LoadVector mem)));
20447 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20448 ins_encode %{
20449 int vlen_enc = vector_length_encoding(this);
20450 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20451 %}
20452 ins_pipe( pipe_slow );
20453 %}
20454
20455 // Floats vector add
20456 instruct vaddF(vec dst, vec src) %{
20457 predicate(UseAVX == 0);
20458 match(Set dst (AddVF dst src));
20459 format %{ "addps $dst,$src\t! add packedF" %}
20460 ins_encode %{
20461 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20462 %}
20463 ins_pipe( pipe_slow );
20464 %}
20465
20466 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20467 predicate(UseAVX > 0);
20468 match(Set dst (AddVF src1 src2));
20469 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20470 ins_encode %{
20471 int vlen_enc = vector_length_encoding(this);
20472 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20473 %}
20474 ins_pipe( pipe_slow );
20475 %}
20476
20477 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20478 predicate((UseAVX > 0) &&
20479 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20480 match(Set dst (AddVF src (LoadVector mem)));
20481 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20482 ins_encode %{
20483 int vlen_enc = vector_length_encoding(this);
20484 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20485 %}
20486 ins_pipe( pipe_slow );
20487 %}
20488
20489 // Doubles vector add
20490 instruct vaddD(vec dst, vec src) %{
20491 predicate(UseAVX == 0);
20492 match(Set dst (AddVD dst src));
20493 format %{ "addpd $dst,$src\t! add packedD" %}
20494 ins_encode %{
20495 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20496 %}
20497 ins_pipe( pipe_slow );
20498 %}
20499
20500 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20501 predicate(UseAVX > 0);
20502 match(Set dst (AddVD src1 src2));
20503 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20504 ins_encode %{
20505 int vlen_enc = vector_length_encoding(this);
20506 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20507 %}
20508 ins_pipe( pipe_slow );
20509 %}
20510
20511 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20512 predicate((UseAVX > 0) &&
20513 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20514 match(Set dst (AddVD src (LoadVector mem)));
20515 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20516 ins_encode %{
20517 int vlen_enc = vector_length_encoding(this);
20518 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20519 %}
20520 ins_pipe( pipe_slow );
20521 %}
20522
20523 // --------------------------------- SUB --------------------------------------
20524
20525 // Bytes vector sub
20526 instruct vsubB(vec dst, vec src) %{
20527 predicate(UseAVX == 0);
20528 match(Set dst (SubVB dst src));
20529 format %{ "psubb $dst,$src\t! sub packedB" %}
20530 ins_encode %{
20531 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20532 %}
20533 ins_pipe( pipe_slow );
20534 %}
20535
20536 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20537 predicate(UseAVX > 0);
20538 match(Set dst (SubVB src1 src2));
20539 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20540 ins_encode %{
20541 int vlen_enc = vector_length_encoding(this);
20542 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20543 %}
20544 ins_pipe( pipe_slow );
20545 %}
20546
20547 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20548 predicate((UseAVX > 0) &&
20549 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20550 match(Set dst (SubVB src (LoadVector mem)));
20551 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20552 ins_encode %{
20553 int vlen_enc = vector_length_encoding(this);
20554 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20555 %}
20556 ins_pipe( pipe_slow );
20557 %}
20558
20559 // Shorts/Chars vector sub
20560 instruct vsubS(vec dst, vec src) %{
20561 predicate(UseAVX == 0);
20562 match(Set dst (SubVS dst src));
20563 format %{ "psubw $dst,$src\t! sub packedS" %}
20564 ins_encode %{
20565 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20566 %}
20567 ins_pipe( pipe_slow );
20568 %}
20569
20570
20571 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20572 predicate(UseAVX > 0);
20573 match(Set dst (SubVS src1 src2));
20574 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20575 ins_encode %{
20576 int vlen_enc = vector_length_encoding(this);
20577 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20578 %}
20579 ins_pipe( pipe_slow );
20580 %}
20581
20582 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20583 predicate((UseAVX > 0) &&
20584 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20585 match(Set dst (SubVS src (LoadVector mem)));
20586 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20587 ins_encode %{
20588 int vlen_enc = vector_length_encoding(this);
20589 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20590 %}
20591 ins_pipe( pipe_slow );
20592 %}
20593
20594 // Integers vector sub
20595 instruct vsubI(vec dst, vec src) %{
20596 predicate(UseAVX == 0);
20597 match(Set dst (SubVI dst src));
20598 format %{ "psubd $dst,$src\t! sub packedI" %}
20599 ins_encode %{
20600 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20601 %}
20602 ins_pipe( pipe_slow );
20603 %}
20604
20605 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20606 predicate(UseAVX > 0);
20607 match(Set dst (SubVI src1 src2));
20608 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20609 ins_encode %{
20610 int vlen_enc = vector_length_encoding(this);
20611 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20612 %}
20613 ins_pipe( pipe_slow );
20614 %}
20615
20616 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20617 predicate((UseAVX > 0) &&
20618 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20619 match(Set dst (SubVI src (LoadVector mem)));
20620 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20621 ins_encode %{
20622 int vlen_enc = vector_length_encoding(this);
20623 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20624 %}
20625 ins_pipe( pipe_slow );
20626 %}
20627
20628 // Longs vector sub
20629 instruct vsubL(vec dst, vec src) %{
20630 predicate(UseAVX == 0);
20631 match(Set dst (SubVL dst src));
20632 format %{ "psubq $dst,$src\t! sub packedL" %}
20633 ins_encode %{
20634 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20635 %}
20636 ins_pipe( pipe_slow );
20637 %}
20638
20639 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20640 predicate(UseAVX > 0);
20641 match(Set dst (SubVL src1 src2));
20642 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20643 ins_encode %{
20644 int vlen_enc = vector_length_encoding(this);
20645 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20646 %}
20647 ins_pipe( pipe_slow );
20648 %}
20649
20650
20651 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20652 predicate((UseAVX > 0) &&
20653 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20654 match(Set dst (SubVL src (LoadVector mem)));
20655 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20656 ins_encode %{
20657 int vlen_enc = vector_length_encoding(this);
20658 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20659 %}
20660 ins_pipe( pipe_slow );
20661 %}
20662
20663 // Floats vector sub
20664 instruct vsubF(vec dst, vec src) %{
20665 predicate(UseAVX == 0);
20666 match(Set dst (SubVF dst src));
20667 format %{ "subps $dst,$src\t! sub packedF" %}
20668 ins_encode %{
20669 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20670 %}
20671 ins_pipe( pipe_slow );
20672 %}
20673
20674 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20675 predicate(UseAVX > 0);
20676 match(Set dst (SubVF src1 src2));
20677 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20678 ins_encode %{
20679 int vlen_enc = vector_length_encoding(this);
20680 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20681 %}
20682 ins_pipe( pipe_slow );
20683 %}
20684
20685 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20686 predicate((UseAVX > 0) &&
20687 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20688 match(Set dst (SubVF src (LoadVector mem)));
20689 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20690 ins_encode %{
20691 int vlen_enc = vector_length_encoding(this);
20692 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20693 %}
20694 ins_pipe( pipe_slow );
20695 %}
20696
20697 // Doubles vector sub
20698 instruct vsubD(vec dst, vec src) %{
20699 predicate(UseAVX == 0);
20700 match(Set dst (SubVD dst src));
20701 format %{ "subpd $dst,$src\t! sub packedD" %}
20702 ins_encode %{
20703 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20704 %}
20705 ins_pipe( pipe_slow );
20706 %}
20707
20708 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20709 predicate(UseAVX > 0);
20710 match(Set dst (SubVD src1 src2));
20711 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20712 ins_encode %{
20713 int vlen_enc = vector_length_encoding(this);
20714 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20715 %}
20716 ins_pipe( pipe_slow );
20717 %}
20718
20719 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20720 predicate((UseAVX > 0) &&
20721 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20722 match(Set dst (SubVD src (LoadVector mem)));
20723 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20724 ins_encode %{
20725 int vlen_enc = vector_length_encoding(this);
20726 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20727 %}
20728 ins_pipe( pipe_slow );
20729 %}
20730
20731 // --------------------------------- MUL --------------------------------------
20732
20733 // Byte vector mul
20734 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20735 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20736 match(Set dst (MulVB src1 src2));
20737 effect(TEMP dst, TEMP xtmp);
20738 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20739 ins_encode %{
20740 assert(UseSSE > 3, "required");
20741 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20742 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20743 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20744 __ psllw($dst$$XMMRegister, 8);
20745 __ psrlw($dst$$XMMRegister, 8);
20746 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20747 %}
20748 ins_pipe( pipe_slow );
20749 %}
20750
20751 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20752 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20753 match(Set dst (MulVB src1 src2));
20754 effect(TEMP dst, TEMP xtmp);
20755 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20756 ins_encode %{
20757 assert(UseSSE > 3, "required");
20758 // Odd-index elements
20759 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20760 __ psrlw($dst$$XMMRegister, 8);
20761 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20762 __ psrlw($xtmp$$XMMRegister, 8);
20763 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20764 __ psllw($dst$$XMMRegister, 8);
20765 // Even-index elements
20766 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20767 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20768 __ psllw($xtmp$$XMMRegister, 8);
20769 __ psrlw($xtmp$$XMMRegister, 8);
20770 // Combine
20771 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20772 %}
20773 ins_pipe( pipe_slow );
20774 %}
20775
20776 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20777 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20778 match(Set dst (MulVB src1 src2));
20779 effect(TEMP xtmp1, TEMP xtmp2);
20780 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20781 ins_encode %{
20782 int vlen_enc = vector_length_encoding(this);
20783 // Odd-index elements
20784 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20785 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20786 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20787 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20788 // Even-index elements
20789 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20790 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20791 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20792 // Combine
20793 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20794 %}
20795 ins_pipe( pipe_slow );
20796 %}
20797
20798 // Shorts/Chars vector mul
20799 instruct vmulS(vec dst, vec src) %{
20800 predicate(UseAVX == 0);
20801 match(Set dst (MulVS dst src));
20802 format %{ "pmullw $dst,$src\t! mul packedS" %}
20803 ins_encode %{
20804 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20805 %}
20806 ins_pipe( pipe_slow );
20807 %}
20808
20809 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20810 predicate(UseAVX > 0);
20811 match(Set dst (MulVS src1 src2));
20812 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20813 ins_encode %{
20814 int vlen_enc = vector_length_encoding(this);
20815 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20816 %}
20817 ins_pipe( pipe_slow );
20818 %}
20819
20820 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20821 predicate((UseAVX > 0) &&
20822 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20823 match(Set dst (MulVS src (LoadVector mem)));
20824 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20825 ins_encode %{
20826 int vlen_enc = vector_length_encoding(this);
20827 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20828 %}
20829 ins_pipe( pipe_slow );
20830 %}
20831
20832 // Integers vector mul
20833 instruct vmulI(vec dst, vec src) %{
20834 predicate(UseAVX == 0);
20835 match(Set dst (MulVI dst src));
20836 format %{ "pmulld $dst,$src\t! mul packedI" %}
20837 ins_encode %{
20838 assert(UseSSE > 3, "required");
20839 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20840 %}
20841 ins_pipe( pipe_slow );
20842 %}
20843
20844 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20845 predicate(UseAVX > 0);
20846 match(Set dst (MulVI src1 src2));
20847 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20848 ins_encode %{
20849 int vlen_enc = vector_length_encoding(this);
20850 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20851 %}
20852 ins_pipe( pipe_slow );
20853 %}
20854
20855 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20856 predicate((UseAVX > 0) &&
20857 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20858 match(Set dst (MulVI src (LoadVector mem)));
20859 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20860 ins_encode %{
20861 int vlen_enc = vector_length_encoding(this);
20862 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20863 %}
20864 ins_pipe( pipe_slow );
20865 %}
20866
20867 // Longs vector mul
20868 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20869 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20870 VM_Version::supports_avx512dq()) ||
20871 VM_Version::supports_avx512vldq());
20872 match(Set dst (MulVL src1 src2));
20873 ins_cost(500);
20874 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20875 ins_encode %{
20876 assert(UseAVX > 2, "required");
20877 int vlen_enc = vector_length_encoding(this);
20878 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20879 %}
20880 ins_pipe( pipe_slow );
20881 %}
20882
20883 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20884 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20885 VM_Version::supports_avx512dq()) ||
20886 (Matcher::vector_length_in_bytes(n) > 8 &&
20887 VM_Version::supports_avx512vldq()));
20888 match(Set dst (MulVL src (LoadVector mem)));
20889 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20890 ins_cost(500);
20891 ins_encode %{
20892 assert(UseAVX > 2, "required");
20893 int vlen_enc = vector_length_encoding(this);
20894 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20895 %}
20896 ins_pipe( pipe_slow );
20897 %}
20898
20899 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20900 predicate(UseAVX == 0);
20901 match(Set dst (MulVL src1 src2));
20902 ins_cost(500);
20903 effect(TEMP dst, TEMP xtmp);
20904 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20905 ins_encode %{
20906 assert(VM_Version::supports_sse4_1(), "required");
20907 // Get the lo-hi products, only the lower 32 bits is in concerns
20908 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20909 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20910 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20911 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20912 __ psllq($dst$$XMMRegister, 32);
20913 // Get the lo-lo products
20914 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20915 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20916 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20917 %}
20918 ins_pipe( pipe_slow );
20919 %}
20920
20921 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20922 predicate(UseAVX > 0 &&
20923 ((Matcher::vector_length_in_bytes(n) == 64 &&
20924 !VM_Version::supports_avx512dq()) ||
20925 (Matcher::vector_length_in_bytes(n) < 64 &&
20926 !VM_Version::supports_avx512vldq())));
20927 match(Set dst (MulVL src1 src2));
20928 effect(TEMP xtmp1, TEMP xtmp2);
20929 ins_cost(500);
20930 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20931 ins_encode %{
20932 int vlen_enc = vector_length_encoding(this);
20933 // Get the lo-hi products, only the lower 32 bits is in concerns
20934 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20935 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20936 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20937 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20938 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20939 // Get the lo-lo products
20940 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20941 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20942 %}
20943 ins_pipe( pipe_slow );
20944 %}
20945
20946 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20947 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20948 match(Set dst (MulVL src1 src2));
20949 ins_cost(100);
20950 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20951 ins_encode %{
20952 int vlen_enc = vector_length_encoding(this);
20953 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20954 %}
20955 ins_pipe( pipe_slow );
20956 %}
20957
20958 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20959 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20960 match(Set dst (MulVL src1 src2));
20961 ins_cost(100);
20962 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20963 ins_encode %{
20964 int vlen_enc = vector_length_encoding(this);
20965 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20966 %}
20967 ins_pipe( pipe_slow );
20968 %}
20969
20970 // Floats vector mul
20971 instruct vmulF(vec dst, vec src) %{
20972 predicate(UseAVX == 0);
20973 match(Set dst (MulVF dst src));
20974 format %{ "mulps $dst,$src\t! mul packedF" %}
20975 ins_encode %{
20976 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20977 %}
20978 ins_pipe( pipe_slow );
20979 %}
20980
20981 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20982 predicate(UseAVX > 0);
20983 match(Set dst (MulVF src1 src2));
20984 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20985 ins_encode %{
20986 int vlen_enc = vector_length_encoding(this);
20987 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20988 %}
20989 ins_pipe( pipe_slow );
20990 %}
20991
20992 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20993 predicate((UseAVX > 0) &&
20994 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20995 match(Set dst (MulVF src (LoadVector mem)));
20996 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20997 ins_encode %{
20998 int vlen_enc = vector_length_encoding(this);
20999 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21000 %}
21001 ins_pipe( pipe_slow );
21002 %}
21003
21004 // Doubles vector mul
21005 instruct vmulD(vec dst, vec src) %{
21006 predicate(UseAVX == 0);
21007 match(Set dst (MulVD dst src));
21008 format %{ "mulpd $dst,$src\t! mul packedD" %}
21009 ins_encode %{
21010 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
21011 %}
21012 ins_pipe( pipe_slow );
21013 %}
21014
21015 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
21016 predicate(UseAVX > 0);
21017 match(Set dst (MulVD src1 src2));
21018 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
21019 ins_encode %{
21020 int vlen_enc = vector_length_encoding(this);
21021 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21022 %}
21023 ins_pipe( pipe_slow );
21024 %}
21025
21026 instruct vmulD_mem(vec dst, vec src, memory mem) %{
21027 predicate((UseAVX > 0) &&
21028 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21029 match(Set dst (MulVD src (LoadVector mem)));
21030 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
21031 ins_encode %{
21032 int vlen_enc = vector_length_encoding(this);
21033 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21034 %}
21035 ins_pipe( pipe_slow );
21036 %}
21037
21038 // --------------------------------- DIV --------------------------------------
21039
21040 // Floats vector div
21041 instruct vdivF(vec dst, vec src) %{
21042 predicate(UseAVX == 0);
21043 match(Set dst (DivVF dst src));
21044 format %{ "divps $dst,$src\t! div packedF" %}
21045 ins_encode %{
21046 __ divps($dst$$XMMRegister, $src$$XMMRegister);
21047 %}
21048 ins_pipe( pipe_slow );
21049 %}
21050
21051 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
21052 predicate(UseAVX > 0);
21053 match(Set dst (DivVF src1 src2));
21054 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
21055 ins_encode %{
21056 int vlen_enc = vector_length_encoding(this);
21057 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21058 %}
21059 ins_pipe( pipe_slow );
21060 %}
21061
21062 instruct vdivF_mem(vec dst, vec src, memory mem) %{
21063 predicate((UseAVX > 0) &&
21064 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21065 match(Set dst (DivVF src (LoadVector mem)));
21066 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
21067 ins_encode %{
21068 int vlen_enc = vector_length_encoding(this);
21069 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21070 %}
21071 ins_pipe( pipe_slow );
21072 %}
21073
21074 // Doubles vector div
21075 instruct vdivD(vec dst, vec src) %{
21076 predicate(UseAVX == 0);
21077 match(Set dst (DivVD dst src));
21078 format %{ "divpd $dst,$src\t! div packedD" %}
21079 ins_encode %{
21080 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
21081 %}
21082 ins_pipe( pipe_slow );
21083 %}
21084
21085 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
21086 predicate(UseAVX > 0);
21087 match(Set dst (DivVD src1 src2));
21088 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
21089 ins_encode %{
21090 int vlen_enc = vector_length_encoding(this);
21091 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21092 %}
21093 ins_pipe( pipe_slow );
21094 %}
21095
21096 instruct vdivD_mem(vec dst, vec src, memory mem) %{
21097 predicate((UseAVX > 0) &&
21098 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21099 match(Set dst (DivVD src (LoadVector mem)));
21100 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
21101 ins_encode %{
21102 int vlen_enc = vector_length_encoding(this);
21103 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21104 %}
21105 ins_pipe( pipe_slow );
21106 %}
21107
21108 // ------------------------------ MinMax ---------------------------------------
21109
21110 // Byte, Short, Int vector Min/Max
21111 instruct minmax_reg_sse(vec dst, vec src) %{
21112 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21113 UseAVX == 0);
21114 match(Set dst (MinV dst src));
21115 match(Set dst (MaxV dst src));
21116 format %{ "vector_minmax $dst,$src\t! " %}
21117 ins_encode %{
21118 assert(UseSSE >= 4, "required");
21119
21120 int opcode = this->ideal_Opcode();
21121 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21122 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
21123 %}
21124 ins_pipe( pipe_slow );
21125 %}
21126
21127 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
21128 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
21129 UseAVX > 0);
21130 match(Set dst (MinV src1 src2));
21131 match(Set dst (MaxV src1 src2));
21132 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
21133 ins_encode %{
21134 int opcode = this->ideal_Opcode();
21135 int vlen_enc = vector_length_encoding(this);
21136 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21137
21138 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21139 %}
21140 ins_pipe( pipe_slow );
21141 %}
21142
21143 // Long vector Min/Max
21144 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
21145 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
21146 UseAVX == 0);
21147 match(Set dst (MinV dst src));
21148 match(Set dst (MaxV src dst));
21149 effect(TEMP dst, TEMP tmp);
21150 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
21151 ins_encode %{
21152 assert(UseSSE >= 4, "required");
21153
21154 int opcode = this->ideal_Opcode();
21155 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21156 assert(elem_bt == T_LONG, "sanity");
21157
21158 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
21159 %}
21160 ins_pipe( pipe_slow );
21161 %}
21162
21163 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
21164 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
21165 UseAVX > 0 && !VM_Version::supports_avx512vl());
21166 match(Set dst (MinV src1 src2));
21167 match(Set dst (MaxV src1 src2));
21168 effect(TEMP dst);
21169 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
21170 ins_encode %{
21171 int vlen_enc = vector_length_encoding(this);
21172 int opcode = this->ideal_Opcode();
21173 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21174 assert(elem_bt == T_LONG, "sanity");
21175
21176 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21177 %}
21178 ins_pipe( pipe_slow );
21179 %}
21180
21181 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
21182 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
21183 Matcher::vector_element_basic_type(n) == T_LONG);
21184 match(Set dst (MinV src1 src2));
21185 match(Set dst (MaxV src1 src2));
21186 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
21187 ins_encode %{
21188 assert(UseAVX > 2, "required");
21189
21190 int vlen_enc = vector_length_encoding(this);
21191 int opcode = this->ideal_Opcode();
21192 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21193 assert(elem_bt == T_LONG, "sanity");
21194
21195 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21196 %}
21197 ins_pipe( pipe_slow );
21198 %}
21199
21200 // Float/Double vector Min/Max
21201 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
21202 predicate(VM_Version::supports_avx10_2() &&
21203 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21204 match(Set dst (MinV a b));
21205 match(Set dst (MaxV a b));
21206 format %{ "vector_minmaxFP $dst, $a, $b" %}
21207 ins_encode %{
21208 int vlen_enc = vector_length_encoding(this);
21209 int opcode = this->ideal_Opcode();
21210 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21211 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21212 %}
21213 ins_pipe( pipe_slow );
21214 %}
21215
21216 // Float/Double vector Min/Max
21217 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
21218 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
21219 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
21220 UseAVX > 0);
21221 match(Set dst (MinV a b));
21222 match(Set dst (MaxV a b));
21223 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
21224 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
21225 ins_encode %{
21226 assert(UseAVX > 0, "required");
21227
21228 int opcode = this->ideal_Opcode();
21229 int vlen_enc = vector_length_encoding(this);
21230 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21231
21232 __ vminmax_fp(opcode, elem_bt,
21233 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21234 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21235 %}
21236 ins_pipe( pipe_slow );
21237 %}
21238
21239 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
21240 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
21241 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
21242 match(Set dst (MinV a b));
21243 match(Set dst (MaxV a b));
21244 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21245 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21246 ins_encode %{
21247 assert(UseAVX > 2, "required");
21248
21249 int opcode = this->ideal_Opcode();
21250 int vlen_enc = vector_length_encoding(this);
21251 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21252
21253 __ evminmax_fp(opcode, elem_bt,
21254 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21255 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21256 %}
21257 ins_pipe( pipe_slow );
21258 %}
21259
21260 // ------------------------------ Unsigned vector Min/Max ----------------------
21261
21262 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21263 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21264 match(Set dst (UMinV a b));
21265 match(Set dst (UMaxV a b));
21266 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21267 ins_encode %{
21268 int opcode = this->ideal_Opcode();
21269 int vlen_enc = vector_length_encoding(this);
21270 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21271 assert(is_integral_type(elem_bt), "");
21272 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21273 %}
21274 ins_pipe( pipe_slow );
21275 %}
21276
21277 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21278 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21279 match(Set dst (UMinV a (LoadVector b)));
21280 match(Set dst (UMaxV a (LoadVector b)));
21281 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21282 ins_encode %{
21283 int opcode = this->ideal_Opcode();
21284 int vlen_enc = vector_length_encoding(this);
21285 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21286 assert(is_integral_type(elem_bt), "");
21287 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21288 %}
21289 ins_pipe( pipe_slow );
21290 %}
21291
21292 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21293 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21294 match(Set dst (UMinV a b));
21295 match(Set dst (UMaxV a b));
21296 effect(TEMP xtmp1, TEMP xtmp2);
21297 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21298 ins_encode %{
21299 int opcode = this->ideal_Opcode();
21300 int vlen_enc = vector_length_encoding(this);
21301 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21302 %}
21303 ins_pipe( pipe_slow );
21304 %}
21305
21306 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21307 match(Set dst (UMinV (Binary dst src2) mask));
21308 match(Set dst (UMaxV (Binary dst src2) mask));
21309 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21310 ins_encode %{
21311 int vlen_enc = vector_length_encoding(this);
21312 BasicType bt = Matcher::vector_element_basic_type(this);
21313 int opc = this->ideal_Opcode();
21314 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21315 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21316 %}
21317 ins_pipe( pipe_slow );
21318 %}
21319
21320 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21321 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21322 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21323 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21324 ins_encode %{
21325 int vlen_enc = vector_length_encoding(this);
21326 BasicType bt = Matcher::vector_element_basic_type(this);
21327 int opc = this->ideal_Opcode();
21328 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21329 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21330 %}
21331 ins_pipe( pipe_slow );
21332 %}
21333
21334 // --------------------------------- Signum/CopySign ---------------------------
21335
21336 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21337 match(Set dst (SignumF dst (Binary zero one)));
21338 effect(KILL cr);
21339 format %{ "signumF $dst, $dst" %}
21340 ins_encode %{
21341 int opcode = this->ideal_Opcode();
21342 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21343 %}
21344 ins_pipe( pipe_slow );
21345 %}
21346
21347 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21348 match(Set dst (SignumD dst (Binary zero one)));
21349 effect(KILL cr);
21350 format %{ "signumD $dst, $dst" %}
21351 ins_encode %{
21352 int opcode = this->ideal_Opcode();
21353 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21354 %}
21355 ins_pipe( pipe_slow );
21356 %}
21357
21358 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21359 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21360 match(Set dst (SignumVF src (Binary zero one)));
21361 match(Set dst (SignumVD src (Binary zero one)));
21362 effect(TEMP dst, TEMP xtmp1);
21363 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21364 ins_encode %{
21365 int opcode = this->ideal_Opcode();
21366 int vec_enc = vector_length_encoding(this);
21367 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21368 $xtmp1$$XMMRegister, vec_enc);
21369 %}
21370 ins_pipe( pipe_slow );
21371 %}
21372
21373 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21374 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21375 match(Set dst (SignumVF src (Binary zero one)));
21376 match(Set dst (SignumVD src (Binary zero one)));
21377 effect(TEMP dst, TEMP ktmp1);
21378 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21379 ins_encode %{
21380 int opcode = this->ideal_Opcode();
21381 int vec_enc = vector_length_encoding(this);
21382 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21383 $ktmp1$$KRegister, vec_enc);
21384 %}
21385 ins_pipe( pipe_slow );
21386 %}
21387
21388 // ---------------------------------------
21389 // For copySign use 0xE4 as writemask for vpternlog
21390 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21391 // C (xmm2) is set to 0x7FFFFFFF
21392 // Wherever xmm2 is 0, we want to pick from B (sign)
21393 // Wherever xmm2 is 1, we want to pick from A (src)
21394 //
21395 // A B C Result
21396 // 0 0 0 0
21397 // 0 0 1 0
21398 // 0 1 0 1
21399 // 0 1 1 0
21400 // 1 0 0 0
21401 // 1 0 1 1
21402 // 1 1 0 1
21403 // 1 1 1 1
21404 //
21405 // Result going from high bit to low bit is 0x11100100 = 0xe4
21406 // ---------------------------------------
21407
21408 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21409 match(Set dst (CopySignF dst src));
21410 effect(TEMP tmp1, TEMP tmp2);
21411 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21412 ins_encode %{
21413 __ movl($tmp2$$Register, 0x7FFFFFFF);
21414 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21415 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21416 %}
21417 ins_pipe( pipe_slow );
21418 %}
21419
21420 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21421 match(Set dst (CopySignD dst (Binary src zero)));
21422 ins_cost(100);
21423 effect(TEMP tmp1, TEMP tmp2);
21424 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21425 ins_encode %{
21426 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21427 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21428 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21429 %}
21430 ins_pipe( pipe_slow );
21431 %}
21432
21433 //----------------------------- CompressBits/ExpandBits ------------------------
21434
21435 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21436 predicate(n->bottom_type()->isa_int());
21437 match(Set dst (CompressBits src mask));
21438 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21439 ins_encode %{
21440 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21441 %}
21442 ins_pipe( pipe_slow );
21443 %}
21444
21445 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21446 predicate(n->bottom_type()->isa_int());
21447 match(Set dst (ExpandBits src mask));
21448 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21449 ins_encode %{
21450 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21451 %}
21452 ins_pipe( pipe_slow );
21453 %}
21454
21455 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21456 predicate(n->bottom_type()->isa_int());
21457 match(Set dst (CompressBits src (LoadI mask)));
21458 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21459 ins_encode %{
21460 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21461 %}
21462 ins_pipe( pipe_slow );
21463 %}
21464
21465 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21466 predicate(n->bottom_type()->isa_int());
21467 match(Set dst (ExpandBits src (LoadI mask)));
21468 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21469 ins_encode %{
21470 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21471 %}
21472 ins_pipe( pipe_slow );
21473 %}
21474
21475 // --------------------------------- Sqrt --------------------------------------
21476
21477 instruct vsqrtF_reg(vec dst, vec src) %{
21478 match(Set dst (SqrtVF src));
21479 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21480 ins_encode %{
21481 assert(UseAVX > 0, "required");
21482 int vlen_enc = vector_length_encoding(this);
21483 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21484 %}
21485 ins_pipe( pipe_slow );
21486 %}
21487
21488 instruct vsqrtF_mem(vec dst, memory mem) %{
21489 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21490 match(Set dst (SqrtVF (LoadVector mem)));
21491 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21492 ins_encode %{
21493 assert(UseAVX > 0, "required");
21494 int vlen_enc = vector_length_encoding(this);
21495 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21496 %}
21497 ins_pipe( pipe_slow );
21498 %}
21499
21500 // Floating point vector sqrt
21501 instruct vsqrtD_reg(vec dst, vec src) %{
21502 match(Set dst (SqrtVD src));
21503 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21504 ins_encode %{
21505 assert(UseAVX > 0, "required");
21506 int vlen_enc = vector_length_encoding(this);
21507 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21508 %}
21509 ins_pipe( pipe_slow );
21510 %}
21511
21512 instruct vsqrtD_mem(vec dst, memory mem) %{
21513 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21514 match(Set dst (SqrtVD (LoadVector mem)));
21515 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21516 ins_encode %{
21517 assert(UseAVX > 0, "required");
21518 int vlen_enc = vector_length_encoding(this);
21519 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21520 %}
21521 ins_pipe( pipe_slow );
21522 %}
21523
21524 // ------------------------------ Shift ---------------------------------------
21525
21526 // Left and right shift count vectors are the same on x86
21527 // (only lowest bits of xmm reg are used for count).
21528 instruct vshiftcnt(vec dst, rRegI cnt) %{
21529 match(Set dst (LShiftCntV cnt));
21530 match(Set dst (RShiftCntV cnt));
21531 format %{ "movdl $dst,$cnt\t! load shift count" %}
21532 ins_encode %{
21533 __ movdl($dst$$XMMRegister, $cnt$$Register);
21534 %}
21535 ins_pipe( pipe_slow );
21536 %}
21537
21538 // Byte vector shift
21539 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21540 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21541 match(Set dst ( LShiftVB src shift));
21542 match(Set dst ( RShiftVB src shift));
21543 match(Set dst (URShiftVB src shift));
21544 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21545 format %{"vector_byte_shift $dst,$src,$shift" %}
21546 ins_encode %{
21547 assert(UseSSE > 3, "required");
21548 int opcode = this->ideal_Opcode();
21549 bool sign = (opcode != Op_URShiftVB);
21550 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21551 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21552 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21553 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21554 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21555 %}
21556 ins_pipe( pipe_slow );
21557 %}
21558
21559 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21560 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21561 UseAVX <= 1);
21562 match(Set dst ( LShiftVB src shift));
21563 match(Set dst ( RShiftVB src shift));
21564 match(Set dst (URShiftVB src shift));
21565 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21566 format %{"vector_byte_shift $dst,$src,$shift" %}
21567 ins_encode %{
21568 assert(UseSSE > 3, "required");
21569 int opcode = this->ideal_Opcode();
21570 bool sign = (opcode != Op_URShiftVB);
21571 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21572 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21573 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21574 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21575 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21576 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21577 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21578 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21579 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21580 %}
21581 ins_pipe( pipe_slow );
21582 %}
21583
21584 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21585 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21586 UseAVX > 1);
21587 match(Set dst ( LShiftVB src shift));
21588 match(Set dst ( RShiftVB src shift));
21589 match(Set dst (URShiftVB src shift));
21590 effect(TEMP dst, TEMP tmp);
21591 format %{"vector_byte_shift $dst,$src,$shift" %}
21592 ins_encode %{
21593 int opcode = this->ideal_Opcode();
21594 bool sign = (opcode != Op_URShiftVB);
21595 int vlen_enc = Assembler::AVX_256bit;
21596 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21597 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21598 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21599 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21600 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21601 %}
21602 ins_pipe( pipe_slow );
21603 %}
21604
21605 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21606 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21607 match(Set dst ( LShiftVB src shift));
21608 match(Set dst ( RShiftVB src shift));
21609 match(Set dst (URShiftVB src shift));
21610 effect(TEMP dst, TEMP tmp);
21611 format %{"vector_byte_shift $dst,$src,$shift" %}
21612 ins_encode %{
21613 assert(UseAVX > 1, "required");
21614 int opcode = this->ideal_Opcode();
21615 bool sign = (opcode != Op_URShiftVB);
21616 int vlen_enc = Assembler::AVX_256bit;
21617 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21618 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21619 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21620 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21621 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21622 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21623 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21624 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21625 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21626 %}
21627 ins_pipe( pipe_slow );
21628 %}
21629
21630 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21631 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21632 match(Set dst ( LShiftVB src shift));
21633 match(Set dst (RShiftVB src shift));
21634 match(Set dst (URShiftVB src shift));
21635 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21636 format %{"vector_byte_shift $dst,$src,$shift" %}
21637 ins_encode %{
21638 assert(UseAVX > 2, "required");
21639 int opcode = this->ideal_Opcode();
21640 bool sign = (opcode != Op_URShiftVB);
21641 int vlen_enc = Assembler::AVX_512bit;
21642 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21643 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21644 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21645 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21646 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21647 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21648 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21649 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21650 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21651 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21652 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21653 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21654 %}
21655 ins_pipe( pipe_slow );
21656 %}
21657
21658 // Shorts vector logical right shift produces incorrect Java result
21659 // for negative data because java code convert short value into int with
21660 // sign extension before a shift. But char vectors are fine since chars are
21661 // unsigned values.
21662 // Shorts/Chars vector left shift
21663 instruct vshiftS(vec dst, vec src, vec shift) %{
21664 predicate(!n->as_ShiftV()->is_var_shift());
21665 match(Set dst ( LShiftVS src shift));
21666 match(Set dst ( RShiftVS src shift));
21667 match(Set dst (URShiftVS src shift));
21668 effect(TEMP dst, USE src, USE shift);
21669 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21670 ins_encode %{
21671 int opcode = this->ideal_Opcode();
21672 if (UseAVX > 0) {
21673 int vlen_enc = vector_length_encoding(this);
21674 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21675 } else {
21676 int vlen = Matcher::vector_length(this);
21677 if (vlen == 2) {
21678 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21679 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21680 } else if (vlen == 4) {
21681 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21682 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21683 } else {
21684 assert (vlen == 8, "sanity");
21685 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21686 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21687 }
21688 }
21689 %}
21690 ins_pipe( pipe_slow );
21691 %}
21692
21693 // Integers vector left shift
21694 instruct vshiftI(vec dst, vec src, vec shift) %{
21695 predicate(!n->as_ShiftV()->is_var_shift());
21696 match(Set dst ( LShiftVI src shift));
21697 match(Set dst ( RShiftVI src shift));
21698 match(Set dst (URShiftVI src shift));
21699 effect(TEMP dst, USE src, USE shift);
21700 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21701 ins_encode %{
21702 int opcode = this->ideal_Opcode();
21703 if (UseAVX > 0) {
21704 int vlen_enc = vector_length_encoding(this);
21705 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21706 } else {
21707 int vlen = Matcher::vector_length(this);
21708 if (vlen == 2) {
21709 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21710 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21711 } else {
21712 assert(vlen == 4, "sanity");
21713 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21714 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21715 }
21716 }
21717 %}
21718 ins_pipe( pipe_slow );
21719 %}
21720
21721 // Integers vector left constant shift
21722 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21723 match(Set dst (LShiftVI src (LShiftCntV shift)));
21724 match(Set dst (RShiftVI src (RShiftCntV shift)));
21725 match(Set dst (URShiftVI src (RShiftCntV shift)));
21726 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21727 ins_encode %{
21728 int opcode = this->ideal_Opcode();
21729 if (UseAVX > 0) {
21730 int vector_len = vector_length_encoding(this);
21731 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21732 } else {
21733 int vlen = Matcher::vector_length(this);
21734 if (vlen == 2) {
21735 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21736 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21737 } else {
21738 assert(vlen == 4, "sanity");
21739 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21740 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21741 }
21742 }
21743 %}
21744 ins_pipe( pipe_slow );
21745 %}
21746
21747 // Longs vector shift
21748 instruct vshiftL(vec dst, vec src, vec shift) %{
21749 predicate(!n->as_ShiftV()->is_var_shift());
21750 match(Set dst ( LShiftVL src shift));
21751 match(Set dst (URShiftVL src shift));
21752 effect(TEMP dst, USE src, USE shift);
21753 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21754 ins_encode %{
21755 int opcode = this->ideal_Opcode();
21756 if (UseAVX > 0) {
21757 int vlen_enc = vector_length_encoding(this);
21758 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21759 } else {
21760 assert(Matcher::vector_length(this) == 2, "");
21761 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21762 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21763 }
21764 %}
21765 ins_pipe( pipe_slow );
21766 %}
21767
21768 // Longs vector constant shift
21769 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21770 match(Set dst (LShiftVL src (LShiftCntV shift)));
21771 match(Set dst (URShiftVL src (RShiftCntV shift)));
21772 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21773 ins_encode %{
21774 int opcode = this->ideal_Opcode();
21775 if (UseAVX > 0) {
21776 int vector_len = vector_length_encoding(this);
21777 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21778 } else {
21779 assert(Matcher::vector_length(this) == 2, "");
21780 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21781 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21782 }
21783 %}
21784 ins_pipe( pipe_slow );
21785 %}
21786
21787 // -------------------ArithmeticRightShift -----------------------------------
21788 // Long vector arithmetic right shift
21789 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21790 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21791 match(Set dst (RShiftVL src shift));
21792 effect(TEMP dst, TEMP tmp);
21793 format %{ "vshiftq $dst,$src,$shift" %}
21794 ins_encode %{
21795 uint vlen = Matcher::vector_length(this);
21796 if (vlen == 2) {
21797 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21798 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21799 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21800 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21801 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21802 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21803 } else {
21804 assert(vlen == 4, "sanity");
21805 assert(UseAVX > 1, "required");
21806 int vlen_enc = Assembler::AVX_256bit;
21807 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21808 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21809 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21810 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21811 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21812 }
21813 %}
21814 ins_pipe( pipe_slow );
21815 %}
21816
21817 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21818 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21819 match(Set dst (RShiftVL src shift));
21820 format %{ "vshiftq $dst,$src,$shift" %}
21821 ins_encode %{
21822 int vlen_enc = vector_length_encoding(this);
21823 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21824 %}
21825 ins_pipe( pipe_slow );
21826 %}
21827
21828 // ------------------- Variable Shift -----------------------------
21829 // Byte variable shift
21830 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21831 predicate(Matcher::vector_length(n) <= 8 &&
21832 n->as_ShiftV()->is_var_shift() &&
21833 !VM_Version::supports_avx512bw());
21834 match(Set dst ( LShiftVB src shift));
21835 match(Set dst ( RShiftVB src shift));
21836 match(Set dst (URShiftVB src shift));
21837 effect(TEMP dst, TEMP vtmp);
21838 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21839 ins_encode %{
21840 assert(UseAVX >= 2, "required");
21841
21842 int opcode = this->ideal_Opcode();
21843 int vlen_enc = Assembler::AVX_128bit;
21844 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21845 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21846 %}
21847 ins_pipe( pipe_slow );
21848 %}
21849
21850 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21851 predicate(Matcher::vector_length(n) == 16 &&
21852 n->as_ShiftV()->is_var_shift() &&
21853 !VM_Version::supports_avx512bw());
21854 match(Set dst ( LShiftVB src shift));
21855 match(Set dst ( RShiftVB src shift));
21856 match(Set dst (URShiftVB src shift));
21857 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21858 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21859 ins_encode %{
21860 assert(UseAVX >= 2, "required");
21861
21862 int opcode = this->ideal_Opcode();
21863 int vlen_enc = Assembler::AVX_128bit;
21864 // Shift lower half and get word result in dst
21865 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21866
21867 // Shift upper half and get word result in vtmp1
21868 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21869 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21870 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21871
21872 // Merge and down convert the two word results to byte in dst
21873 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21874 %}
21875 ins_pipe( pipe_slow );
21876 %}
21877
21878 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21879 predicate(Matcher::vector_length(n) == 32 &&
21880 n->as_ShiftV()->is_var_shift() &&
21881 !VM_Version::supports_avx512bw());
21882 match(Set dst ( LShiftVB src shift));
21883 match(Set dst ( RShiftVB src shift));
21884 match(Set dst (URShiftVB src shift));
21885 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21886 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21887 ins_encode %{
21888 assert(UseAVX >= 2, "required");
21889
21890 int opcode = this->ideal_Opcode();
21891 int vlen_enc = Assembler::AVX_128bit;
21892 // Process lower 128 bits and get result in dst
21893 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21894 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21895 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21896 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21897 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21898
21899 // Process higher 128 bits and get result in vtmp3
21900 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21901 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21902 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21903 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21904 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21905 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21906 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21907
21908 // Merge the two results in dst
21909 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21910 %}
21911 ins_pipe( pipe_slow );
21912 %}
21913
21914 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21915 predicate(Matcher::vector_length(n) <= 32 &&
21916 n->as_ShiftV()->is_var_shift() &&
21917 VM_Version::supports_avx512bw());
21918 match(Set dst ( LShiftVB src shift));
21919 match(Set dst ( RShiftVB src shift));
21920 match(Set dst (URShiftVB src shift));
21921 effect(TEMP dst, TEMP vtmp);
21922 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21923 ins_encode %{
21924 assert(UseAVX > 2, "required");
21925
21926 int opcode = this->ideal_Opcode();
21927 int vlen_enc = vector_length_encoding(this);
21928 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21929 %}
21930 ins_pipe( pipe_slow );
21931 %}
21932
21933 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21934 predicate(Matcher::vector_length(n) == 64 &&
21935 n->as_ShiftV()->is_var_shift() &&
21936 VM_Version::supports_avx512bw());
21937 match(Set dst ( LShiftVB src shift));
21938 match(Set dst ( RShiftVB src shift));
21939 match(Set dst (URShiftVB src shift));
21940 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21941 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21942 ins_encode %{
21943 assert(UseAVX > 2, "required");
21944
21945 int opcode = this->ideal_Opcode();
21946 int vlen_enc = Assembler::AVX_256bit;
21947 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21948 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21949 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21950 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21951 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21952 %}
21953 ins_pipe( pipe_slow );
21954 %}
21955
21956 // Short variable shift
21957 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21958 predicate(Matcher::vector_length(n) <= 8 &&
21959 n->as_ShiftV()->is_var_shift() &&
21960 !VM_Version::supports_avx512bw());
21961 match(Set dst ( LShiftVS src shift));
21962 match(Set dst ( RShiftVS src shift));
21963 match(Set dst (URShiftVS src shift));
21964 effect(TEMP dst, TEMP vtmp);
21965 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21966 ins_encode %{
21967 assert(UseAVX >= 2, "required");
21968
21969 int opcode = this->ideal_Opcode();
21970 bool sign = (opcode != Op_URShiftVS);
21971 int vlen_enc = Assembler::AVX_256bit;
21972 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21973 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21974 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21975 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21976 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21977 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21978 %}
21979 ins_pipe( pipe_slow );
21980 %}
21981
21982 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21983 predicate(Matcher::vector_length(n) == 16 &&
21984 n->as_ShiftV()->is_var_shift() &&
21985 !VM_Version::supports_avx512bw());
21986 match(Set dst ( LShiftVS src shift));
21987 match(Set dst ( RShiftVS src shift));
21988 match(Set dst (URShiftVS src shift));
21989 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21990 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21991 ins_encode %{
21992 assert(UseAVX >= 2, "required");
21993
21994 int opcode = this->ideal_Opcode();
21995 bool sign = (opcode != Op_URShiftVS);
21996 int vlen_enc = Assembler::AVX_256bit;
21997 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21998 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21999 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22000 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22001 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22002
22003 // Shift upper half, with result in dst using vtmp1 as TEMP
22004 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
22005 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
22006 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22007 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22008 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
22009 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22010
22011 // Merge lower and upper half result into dst
22012 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22013 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
22014 %}
22015 ins_pipe( pipe_slow );
22016 %}
22017
22018 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
22019 predicate(n->as_ShiftV()->is_var_shift() &&
22020 VM_Version::supports_avx512bw());
22021 match(Set dst ( LShiftVS src shift));
22022 match(Set dst ( RShiftVS src shift));
22023 match(Set dst (URShiftVS src shift));
22024 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
22025 ins_encode %{
22026 assert(UseAVX > 2, "required");
22027
22028 int opcode = this->ideal_Opcode();
22029 int vlen_enc = vector_length_encoding(this);
22030 if (!VM_Version::supports_avx512vl()) {
22031 vlen_enc = Assembler::AVX_512bit;
22032 }
22033 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22034 %}
22035 ins_pipe( pipe_slow );
22036 %}
22037
22038 //Integer variable shift
22039 instruct vshiftI_var(vec dst, vec src, vec shift) %{
22040 predicate(n->as_ShiftV()->is_var_shift());
22041 match(Set dst ( LShiftVI src shift));
22042 match(Set dst ( RShiftVI src shift));
22043 match(Set dst (URShiftVI src shift));
22044 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
22045 ins_encode %{
22046 assert(UseAVX >= 2, "required");
22047
22048 int opcode = this->ideal_Opcode();
22049 int vlen_enc = vector_length_encoding(this);
22050 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22051 %}
22052 ins_pipe( pipe_slow );
22053 %}
22054
22055 //Long variable shift
22056 instruct vshiftL_var(vec dst, vec src, vec shift) %{
22057 predicate(n->as_ShiftV()->is_var_shift());
22058 match(Set dst ( LShiftVL src shift));
22059 match(Set dst (URShiftVL src shift));
22060 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
22061 ins_encode %{
22062 assert(UseAVX >= 2, "required");
22063
22064 int opcode = this->ideal_Opcode();
22065 int vlen_enc = vector_length_encoding(this);
22066 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22067 %}
22068 ins_pipe( pipe_slow );
22069 %}
22070
22071 //Long variable right shift arithmetic
22072 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
22073 predicate(Matcher::vector_length(n) <= 4 &&
22074 n->as_ShiftV()->is_var_shift() &&
22075 UseAVX == 2);
22076 match(Set dst (RShiftVL src shift));
22077 effect(TEMP dst, TEMP vtmp);
22078 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
22079 ins_encode %{
22080 int opcode = this->ideal_Opcode();
22081 int vlen_enc = vector_length_encoding(this);
22082 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
22083 $vtmp$$XMMRegister);
22084 %}
22085 ins_pipe( pipe_slow );
22086 %}
22087
22088 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
22089 predicate(n->as_ShiftV()->is_var_shift() &&
22090 UseAVX > 2);
22091 match(Set dst (RShiftVL src shift));
22092 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
22093 ins_encode %{
22094 int opcode = this->ideal_Opcode();
22095 int vlen_enc = vector_length_encoding(this);
22096 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
22097 %}
22098 ins_pipe( pipe_slow );
22099 %}
22100
22101 // --------------------------------- AND --------------------------------------
22102
22103 instruct vand(vec dst, vec src) %{
22104 predicate(UseAVX == 0);
22105 match(Set dst (AndV dst src));
22106 format %{ "pand $dst,$src\t! and vectors" %}
22107 ins_encode %{
22108 __ pand($dst$$XMMRegister, $src$$XMMRegister);
22109 %}
22110 ins_pipe( pipe_slow );
22111 %}
22112
22113 instruct vand_reg(vec dst, vec src1, vec src2) %{
22114 predicate(UseAVX > 0);
22115 match(Set dst (AndV src1 src2));
22116 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
22117 ins_encode %{
22118 int vlen_enc = vector_length_encoding(this);
22119 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22120 %}
22121 ins_pipe( pipe_slow );
22122 %}
22123
22124 instruct vand_mem(vec dst, vec src, memory mem) %{
22125 predicate((UseAVX > 0) &&
22126 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22127 match(Set dst (AndV src (LoadVector mem)));
22128 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
22129 ins_encode %{
22130 int vlen_enc = vector_length_encoding(this);
22131 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22132 %}
22133 ins_pipe( pipe_slow );
22134 %}
22135
22136 // --------------------------------- OR ---------------------------------------
22137
22138 instruct vor(vec dst, vec src) %{
22139 predicate(UseAVX == 0);
22140 match(Set dst (OrV dst src));
22141 format %{ "por $dst,$src\t! or vectors" %}
22142 ins_encode %{
22143 __ por($dst$$XMMRegister, $src$$XMMRegister);
22144 %}
22145 ins_pipe( pipe_slow );
22146 %}
22147
22148 instruct vor_reg(vec dst, vec src1, vec src2) %{
22149 predicate(UseAVX > 0);
22150 match(Set dst (OrV src1 src2));
22151 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
22152 ins_encode %{
22153 int vlen_enc = vector_length_encoding(this);
22154 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22155 %}
22156 ins_pipe( pipe_slow );
22157 %}
22158
22159 instruct vor_mem(vec dst, vec src, memory mem) %{
22160 predicate((UseAVX > 0) &&
22161 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22162 match(Set dst (OrV src (LoadVector mem)));
22163 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
22164 ins_encode %{
22165 int vlen_enc = vector_length_encoding(this);
22166 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22167 %}
22168 ins_pipe( pipe_slow );
22169 %}
22170
22171 // --------------------------------- XOR --------------------------------------
22172
22173 instruct vxor(vec dst, vec src) %{
22174 predicate(UseAVX == 0);
22175 match(Set dst (XorV dst src));
22176 format %{ "pxor $dst,$src\t! xor vectors" %}
22177 ins_encode %{
22178 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
22179 %}
22180 ins_pipe( pipe_slow );
22181 %}
22182
22183 instruct vxor_reg(vec dst, vec src1, vec src2) %{
22184 predicate(UseAVX > 0);
22185 match(Set dst (XorV src1 src2));
22186 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
22187 ins_encode %{
22188 int vlen_enc = vector_length_encoding(this);
22189 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22190 %}
22191 ins_pipe( pipe_slow );
22192 %}
22193
22194 instruct vxor_mem(vec dst, vec src, memory mem) %{
22195 predicate((UseAVX > 0) &&
22196 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
22197 match(Set dst (XorV src (LoadVector mem)));
22198 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
22199 ins_encode %{
22200 int vlen_enc = vector_length_encoding(this);
22201 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
22202 %}
22203 ins_pipe( pipe_slow );
22204 %}
22205
22206 // --------------------------------- VectorCast --------------------------------------
22207
22208 instruct vcastBtoX(vec dst, vec src) %{
22209 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
22210 match(Set dst (VectorCastB2X src));
22211 format %{ "vector_cast_b2x $dst,$src\t!" %}
22212 ins_encode %{
22213 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22214 int vlen_enc = vector_length_encoding(this);
22215 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22216 %}
22217 ins_pipe( pipe_slow );
22218 %}
22219
22220 instruct vcastBtoD(legVec dst, legVec src) %{
22221 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
22222 match(Set dst (VectorCastB2X src));
22223 format %{ "vector_cast_b2x $dst,$src\t!" %}
22224 ins_encode %{
22225 int vlen_enc = vector_length_encoding(this);
22226 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22227 %}
22228 ins_pipe( pipe_slow );
22229 %}
22230
22231 instruct castStoX(vec dst, vec src) %{
22232 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22233 Matcher::vector_length(n->in(1)) <= 8 && // src
22234 Matcher::vector_element_basic_type(n) == T_BYTE);
22235 match(Set dst (VectorCastS2X src));
22236 format %{ "vector_cast_s2x $dst,$src" %}
22237 ins_encode %{
22238 assert(UseAVX > 0, "required");
22239
22240 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
22241 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
22242 %}
22243 ins_pipe( pipe_slow );
22244 %}
22245
22246 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22247 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22248 Matcher::vector_length(n->in(1)) == 16 && // src
22249 Matcher::vector_element_basic_type(n) == T_BYTE);
22250 effect(TEMP dst, TEMP vtmp);
22251 match(Set dst (VectorCastS2X src));
22252 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22253 ins_encode %{
22254 assert(UseAVX > 0, "required");
22255
22256 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22257 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22258 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22259 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22260 %}
22261 ins_pipe( pipe_slow );
22262 %}
22263
22264 instruct vcastStoX_evex(vec dst, vec src) %{
22265 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22266 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22267 match(Set dst (VectorCastS2X src));
22268 format %{ "vector_cast_s2x $dst,$src\t!" %}
22269 ins_encode %{
22270 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22271 int src_vlen_enc = vector_length_encoding(this, $src);
22272 int vlen_enc = vector_length_encoding(this);
22273 switch (to_elem_bt) {
22274 case T_BYTE:
22275 if (!VM_Version::supports_avx512vl()) {
22276 vlen_enc = Assembler::AVX_512bit;
22277 }
22278 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22279 break;
22280 case T_INT:
22281 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22282 break;
22283 case T_FLOAT:
22284 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22285 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22286 break;
22287 case T_LONG:
22288 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22289 break;
22290 case T_DOUBLE: {
22291 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22292 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22293 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22294 break;
22295 }
22296 default:
22297 ShouldNotReachHere();
22298 }
22299 %}
22300 ins_pipe( pipe_slow );
22301 %}
22302
22303 instruct castItoX(vec dst, vec src) %{
22304 predicate(UseAVX <= 2 &&
22305 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22306 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22307 match(Set dst (VectorCastI2X src));
22308 format %{ "vector_cast_i2x $dst,$src" %}
22309 ins_encode %{
22310 assert(UseAVX > 0, "required");
22311
22312 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22313 int vlen_enc = vector_length_encoding(this, $src);
22314
22315 if (to_elem_bt == T_BYTE) {
22316 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22317 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22318 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22319 } else {
22320 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22321 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22322 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22323 }
22324 %}
22325 ins_pipe( pipe_slow );
22326 %}
22327
22328 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22329 predicate(UseAVX <= 2 &&
22330 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22331 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22332 match(Set dst (VectorCastI2X src));
22333 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22334 effect(TEMP dst, TEMP vtmp);
22335 ins_encode %{
22336 assert(UseAVX > 0, "required");
22337
22338 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22339 int vlen_enc = vector_length_encoding(this, $src);
22340
22341 if (to_elem_bt == T_BYTE) {
22342 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22343 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22344 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22345 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22346 } else {
22347 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22348 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22349 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22350 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22351 }
22352 %}
22353 ins_pipe( pipe_slow );
22354 %}
22355
22356 instruct vcastItoX_evex(vec dst, vec src) %{
22357 predicate(UseAVX > 2 ||
22358 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22359 match(Set dst (VectorCastI2X src));
22360 format %{ "vector_cast_i2x $dst,$src\t!" %}
22361 ins_encode %{
22362 assert(UseAVX > 0, "required");
22363
22364 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22365 int src_vlen_enc = vector_length_encoding(this, $src);
22366 int dst_vlen_enc = vector_length_encoding(this);
22367 switch (dst_elem_bt) {
22368 case T_BYTE:
22369 if (!VM_Version::supports_avx512vl()) {
22370 src_vlen_enc = Assembler::AVX_512bit;
22371 }
22372 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22373 break;
22374 case T_SHORT:
22375 if (!VM_Version::supports_avx512vl()) {
22376 src_vlen_enc = Assembler::AVX_512bit;
22377 }
22378 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22379 break;
22380 case T_FLOAT:
22381 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22382 break;
22383 case T_LONG:
22384 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22385 break;
22386 case T_DOUBLE:
22387 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22388 break;
22389 default:
22390 ShouldNotReachHere();
22391 }
22392 %}
22393 ins_pipe( pipe_slow );
22394 %}
22395
22396 instruct vcastLtoBS(vec dst, vec src) %{
22397 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22398 UseAVX <= 2);
22399 match(Set dst (VectorCastL2X src));
22400 format %{ "vector_cast_l2x $dst,$src" %}
22401 ins_encode %{
22402 assert(UseAVX > 0, "required");
22403
22404 int vlen = Matcher::vector_length_in_bytes(this, $src);
22405 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22406 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22407 : ExternalAddress(vector_int_to_short_mask());
22408 if (vlen <= 16) {
22409 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22410 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22411 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22412 } else {
22413 assert(vlen <= 32, "required");
22414 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22415 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22416 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22417 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22418 }
22419 if (to_elem_bt == T_BYTE) {
22420 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22421 }
22422 %}
22423 ins_pipe( pipe_slow );
22424 %}
22425
22426 instruct vcastLtoX_evex(vec dst, vec src) %{
22427 predicate(UseAVX > 2 ||
22428 (Matcher::vector_element_basic_type(n) == T_INT ||
22429 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22430 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22431 match(Set dst (VectorCastL2X src));
22432 format %{ "vector_cast_l2x $dst,$src\t!" %}
22433 ins_encode %{
22434 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22435 int vlen = Matcher::vector_length_in_bytes(this, $src);
22436 int vlen_enc = vector_length_encoding(this, $src);
22437 switch (to_elem_bt) {
22438 case T_BYTE:
22439 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22440 vlen_enc = Assembler::AVX_512bit;
22441 }
22442 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22443 break;
22444 case T_SHORT:
22445 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22446 vlen_enc = Assembler::AVX_512bit;
22447 }
22448 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22449 break;
22450 case T_INT:
22451 if (vlen == 8) {
22452 if ($dst$$XMMRegister != $src$$XMMRegister) {
22453 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22454 }
22455 } else if (vlen == 16) {
22456 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22457 } else if (vlen == 32) {
22458 if (UseAVX > 2) {
22459 if (!VM_Version::supports_avx512vl()) {
22460 vlen_enc = Assembler::AVX_512bit;
22461 }
22462 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22463 } else {
22464 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22465 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22466 }
22467 } else { // vlen == 64
22468 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22469 }
22470 break;
22471 case T_FLOAT:
22472 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22473 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22474 break;
22475 case T_DOUBLE:
22476 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22477 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22478 break;
22479
22480 default: assert(false, "%s", type2name(to_elem_bt));
22481 }
22482 %}
22483 ins_pipe( pipe_slow );
22484 %}
22485
22486 instruct vcastFtoD_reg(vec dst, vec src) %{
22487 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22488 match(Set dst (VectorCastF2X src));
22489 format %{ "vector_cast_f2d $dst,$src\t!" %}
22490 ins_encode %{
22491 int vlen_enc = vector_length_encoding(this);
22492 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22493 %}
22494 ins_pipe( pipe_slow );
22495 %}
22496
22497
22498 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22499 predicate(!VM_Version::supports_avx10_2() &&
22500 !VM_Version::supports_avx512vl() &&
22501 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22502 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22503 is_integral_type(Matcher::vector_element_basic_type(n)));
22504 match(Set dst (VectorCastF2X src));
22505 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22506 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22507 ins_encode %{
22508 int vlen_enc = vector_length_encoding(this, $src);
22509 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22510 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22511 // 32 bit addresses for register indirect addressing mode since stub constants
22512 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22513 // However, targets are free to increase this limit, but having a large code cache size
22514 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22515 // cap we save a temporary register allocation which in limiting case can prevent
22516 // spilling in high register pressure blocks.
22517 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22518 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22519 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22520 %}
22521 ins_pipe( pipe_slow );
22522 %}
22523
22524 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22525 predicate(!VM_Version::supports_avx10_2() &&
22526 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22527 is_integral_type(Matcher::vector_element_basic_type(n)));
22528 match(Set dst (VectorCastF2X src));
22529 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22530 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22531 ins_encode %{
22532 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22533 if (to_elem_bt == T_LONG) {
22534 int vlen_enc = vector_length_encoding(this);
22535 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22536 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22537 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22538 } else {
22539 int vlen_enc = vector_length_encoding(this, $src);
22540 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22541 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22542 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22543 }
22544 %}
22545 ins_pipe( pipe_slow );
22546 %}
22547
22548 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22549 predicate(VM_Version::supports_avx10_2() &&
22550 is_integral_type(Matcher::vector_element_basic_type(n)));
22551 match(Set dst (VectorCastF2X src));
22552 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22553 ins_encode %{
22554 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22555 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22556 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22557 %}
22558 ins_pipe( pipe_slow );
22559 %}
22560
22561 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22562 predicate(VM_Version::supports_avx10_2() &&
22563 is_integral_type(Matcher::vector_element_basic_type(n)));
22564 match(Set dst (VectorCastF2X (LoadVector src)));
22565 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22566 ins_encode %{
22567 int vlen = Matcher::vector_length(this);
22568 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22569 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22570 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22571 %}
22572 ins_pipe( pipe_slow );
22573 %}
22574
22575 instruct vcastDtoF_reg(vec dst, vec src) %{
22576 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22577 match(Set dst (VectorCastD2X src));
22578 format %{ "vector_cast_d2x $dst,$src\t!" %}
22579 ins_encode %{
22580 int vlen_enc = vector_length_encoding(this, $src);
22581 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22582 %}
22583 ins_pipe( pipe_slow );
22584 %}
22585
22586 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22587 predicate(!VM_Version::supports_avx10_2() &&
22588 !VM_Version::supports_avx512vl() &&
22589 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22590 is_integral_type(Matcher::vector_element_basic_type(n)));
22591 match(Set dst (VectorCastD2X src));
22592 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22593 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22594 ins_encode %{
22595 int vlen_enc = vector_length_encoding(this, $src);
22596 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22597 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22598 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22599 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22600 %}
22601 ins_pipe( pipe_slow );
22602 %}
22603
22604 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22605 predicate(!VM_Version::supports_avx10_2() &&
22606 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22607 is_integral_type(Matcher::vector_element_basic_type(n)));
22608 match(Set dst (VectorCastD2X src));
22609 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22610 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22611 ins_encode %{
22612 int vlen_enc = vector_length_encoding(this, $src);
22613 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22614 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22615 ExternalAddress(vector_float_signflip());
22616 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22617 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22618 %}
22619 ins_pipe( pipe_slow );
22620 %}
22621
22622 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22623 predicate(VM_Version::supports_avx10_2() &&
22624 is_integral_type(Matcher::vector_element_basic_type(n)));
22625 match(Set dst (VectorCastD2X src));
22626 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22627 ins_encode %{
22628 int vlen_enc = vector_length_encoding(this, $src);
22629 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22630 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22631 %}
22632 ins_pipe( pipe_slow );
22633 %}
22634
22635 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22636 predicate(VM_Version::supports_avx10_2() &&
22637 is_integral_type(Matcher::vector_element_basic_type(n)));
22638 match(Set dst (VectorCastD2X (LoadVector src)));
22639 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22640 ins_encode %{
22641 int vlen = Matcher::vector_length(this);
22642 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22643 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22644 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22645 %}
22646 ins_pipe( pipe_slow );
22647 %}
22648
22649 instruct vucast(vec dst, vec src) %{
22650 match(Set dst (VectorUCastB2X src));
22651 match(Set dst (VectorUCastS2X src));
22652 match(Set dst (VectorUCastI2X src));
22653 format %{ "vector_ucast $dst,$src\t!" %}
22654 ins_encode %{
22655 assert(UseAVX > 0, "required");
22656
22657 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22658 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22659 int vlen_enc = vector_length_encoding(this);
22660 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22661 %}
22662 ins_pipe( pipe_slow );
22663 %}
22664
22665 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22666 predicate(!VM_Version::supports_avx512vl() &&
22667 Matcher::vector_length_in_bytes(n) < 64 &&
22668 Matcher::vector_element_basic_type(n) == T_INT);
22669 match(Set dst (RoundVF src));
22670 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22671 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22672 ins_encode %{
22673 int vlen_enc = vector_length_encoding(this);
22674 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22675 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22676 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22677 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22678 %}
22679 ins_pipe( pipe_slow );
22680 %}
22681
22682 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22683 predicate((VM_Version::supports_avx512vl() ||
22684 Matcher::vector_length_in_bytes(n) == 64) &&
22685 Matcher::vector_element_basic_type(n) == T_INT);
22686 match(Set dst (RoundVF src));
22687 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22688 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22689 ins_encode %{
22690 int vlen_enc = vector_length_encoding(this);
22691 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22692 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22693 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22694 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22695 %}
22696 ins_pipe( pipe_slow );
22697 %}
22698
22699 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22700 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22701 match(Set dst (RoundVD src));
22702 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22703 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22704 ins_encode %{
22705 int vlen_enc = vector_length_encoding(this);
22706 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22707 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22708 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22709 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22710 %}
22711 ins_pipe( pipe_slow );
22712 %}
22713
22714 // --------------------------------- VectorMaskCmp --------------------------------------
22715
22716 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22717 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22718 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22719 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22720 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22721 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22722 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22723 ins_encode %{
22724 int vlen_enc = vector_length_encoding(this, $src1);
22725 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22726 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22727 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22728 } else {
22729 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22730 }
22731 %}
22732 ins_pipe( pipe_slow );
22733 %}
22734
22735 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22736 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22737 n->bottom_type()->isa_vectmask() == nullptr &&
22738 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22739 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22740 effect(TEMP ktmp);
22741 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22742 ins_encode %{
22743 int vlen_enc = Assembler::AVX_512bit;
22744 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22745 KRegister mask = k0; // The comparison itself is not being masked.
22746 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22747 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22748 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22749 } else {
22750 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22751 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22752 }
22753 %}
22754 ins_pipe( pipe_slow );
22755 %}
22756
22757 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22758 predicate(n->bottom_type()->isa_vectmask() &&
22759 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22760 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22761 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22762 ins_encode %{
22763 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22764 int vlen_enc = vector_length_encoding(this, $src1);
22765 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22766 KRegister mask = k0; // The comparison itself is not being masked.
22767 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22768 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22769 } else {
22770 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22771 }
22772 %}
22773 ins_pipe( pipe_slow );
22774 %}
22775
22776 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22777 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22778 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22779 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22780 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22781 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22782 (n->in(2)->get_int() == BoolTest::eq ||
22783 n->in(2)->get_int() == BoolTest::lt ||
22784 n->in(2)->get_int() == BoolTest::gt)); // cond
22785 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22786 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22787 ins_encode %{
22788 int vlen_enc = vector_length_encoding(this, $src1);
22789 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22790 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22791 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22792 %}
22793 ins_pipe( pipe_slow );
22794 %}
22795
22796 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22797 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22798 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22799 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22800 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22801 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22802 (n->in(2)->get_int() == BoolTest::ne ||
22803 n->in(2)->get_int() == BoolTest::le ||
22804 n->in(2)->get_int() == BoolTest::ge)); // cond
22805 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22806 effect(TEMP dst, TEMP xtmp);
22807 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22808 ins_encode %{
22809 int vlen_enc = vector_length_encoding(this, $src1);
22810 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22811 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22812 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22813 %}
22814 ins_pipe( pipe_slow );
22815 %}
22816
22817 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22818 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22819 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22820 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22821 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22822 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22823 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22824 effect(TEMP dst, TEMP xtmp);
22825 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22826 ins_encode %{
22827 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22828 int vlen_enc = vector_length_encoding(this, $src1);
22829 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22830 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22831
22832 if (vlen_enc == Assembler::AVX_128bit) {
22833 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22834 } else {
22835 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22836 }
22837 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22838 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22839 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22840 %}
22841 ins_pipe( pipe_slow );
22842 %}
22843
22844 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22845 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22846 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22847 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22848 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22849 effect(TEMP ktmp);
22850 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22851 ins_encode %{
22852 assert(UseAVX > 2, "required");
22853
22854 int vlen_enc = vector_length_encoding(this, $src1);
22855 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22856 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22857 KRegister mask = k0; // The comparison itself is not being masked.
22858 bool merge = false;
22859 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22860
22861 switch (src1_elem_bt) {
22862 case T_INT: {
22863 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22864 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22865 break;
22866 }
22867 case T_LONG: {
22868 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22869 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22870 break;
22871 }
22872 default: assert(false, "%s", type2name(src1_elem_bt));
22873 }
22874 %}
22875 ins_pipe( pipe_slow );
22876 %}
22877
22878
22879 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22880 predicate(n->bottom_type()->isa_vectmask() &&
22881 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22882 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22883 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22884 ins_encode %{
22885 assert(UseAVX > 2, "required");
22886 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22887
22888 int vlen_enc = vector_length_encoding(this, $src1);
22889 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22890 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22891 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22892
22893 // Comparison i
22894 switch (src1_elem_bt) {
22895 case T_BYTE: {
22896 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22897 break;
22898 }
22899 case T_SHORT: {
22900 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22901 break;
22902 }
22903 case T_INT: {
22904 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22905 break;
22906 }
22907 case T_LONG: {
22908 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22909 break;
22910 }
22911 default: assert(false, "%s", type2name(src1_elem_bt));
22912 }
22913 %}
22914 ins_pipe( pipe_slow );
22915 %}
22916
22917 // Extract
22918
22919 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22920 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22921 match(Set dst (ExtractI src idx));
22922 match(Set dst (ExtractS src idx));
22923 match(Set dst (ExtractB src idx));
22924 format %{ "extractI $dst,$src,$idx\t!" %}
22925 ins_encode %{
22926 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22927
22928 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22929 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22930 %}
22931 ins_pipe( pipe_slow );
22932 %}
22933
22934 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22935 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22936 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22937 match(Set dst (ExtractI src idx));
22938 match(Set dst (ExtractS src idx));
22939 match(Set dst (ExtractB src idx));
22940 effect(TEMP vtmp);
22941 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22942 ins_encode %{
22943 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22944
22945 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22946 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22947 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22948 %}
22949 ins_pipe( pipe_slow );
22950 %}
22951
22952 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22953 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22954 match(Set dst (ExtractL src idx));
22955 format %{ "extractL $dst,$src,$idx\t!" %}
22956 ins_encode %{
22957 assert(UseSSE >= 4, "required");
22958 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22959
22960 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22961 %}
22962 ins_pipe( pipe_slow );
22963 %}
22964
22965 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22966 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22967 Matcher::vector_length(n->in(1)) == 8); // src
22968 match(Set dst (ExtractL src idx));
22969 effect(TEMP vtmp);
22970 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22971 ins_encode %{
22972 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22973
22974 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22975 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22976 %}
22977 ins_pipe( pipe_slow );
22978 %}
22979
22980 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22981 predicate(Matcher::vector_length(n->in(1)) <= 4);
22982 match(Set dst (ExtractF src idx));
22983 effect(TEMP dst, TEMP vtmp);
22984 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22985 ins_encode %{
22986 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22987
22988 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22989 %}
22990 ins_pipe( pipe_slow );
22991 %}
22992
22993 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22994 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22995 Matcher::vector_length(n->in(1)/*src*/) == 16);
22996 match(Set dst (ExtractF src idx));
22997 effect(TEMP vtmp);
22998 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22999 ins_encode %{
23000 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23001
23002 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23003 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
23004 %}
23005 ins_pipe( pipe_slow );
23006 %}
23007
23008 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
23009 predicate(Matcher::vector_length(n->in(1)) == 2); // src
23010 match(Set dst (ExtractD src idx));
23011 format %{ "extractD $dst,$src,$idx\t!" %}
23012 ins_encode %{
23013 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23014
23015 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23016 %}
23017 ins_pipe( pipe_slow );
23018 %}
23019
23020 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
23021 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
23022 Matcher::vector_length(n->in(1)) == 8); // src
23023 match(Set dst (ExtractD src idx));
23024 effect(TEMP vtmp);
23025 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
23026 ins_encode %{
23027 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
23028
23029 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
23030 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
23031 %}
23032 ins_pipe( pipe_slow );
23033 %}
23034
23035 // --------------------------------- Vector Blend --------------------------------------
23036
23037 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
23038 predicate(UseAVX == 0);
23039 match(Set dst (VectorBlend (Binary dst src) mask));
23040 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
23041 effect(TEMP tmp);
23042 ins_encode %{
23043 assert(UseSSE >= 4, "required");
23044
23045 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
23046 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
23047 }
23048 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
23049 %}
23050 ins_pipe( pipe_slow );
23051 %}
23052
23053 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
23054 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23055 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23056 Matcher::vector_length_in_bytes(n) <= 32 &&
23057 is_integral_type(Matcher::vector_element_basic_type(n)));
23058 match(Set dst (VectorBlend (Binary src1 src2) mask));
23059 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23060 ins_encode %{
23061 int vlen_enc = vector_length_encoding(this);
23062 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23063 %}
23064 ins_pipe( pipe_slow );
23065 %}
23066
23067 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
23068 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
23069 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23070 Matcher::vector_length_in_bytes(n) <= 32 &&
23071 !is_integral_type(Matcher::vector_element_basic_type(n)));
23072 match(Set dst (VectorBlend (Binary src1 src2) mask));
23073 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
23074 ins_encode %{
23075 int vlen_enc = vector_length_encoding(this);
23076 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23077 %}
23078 ins_pipe( pipe_slow );
23079 %}
23080
23081 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
23082 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
23083 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
23084 Matcher::vector_length_in_bytes(n) <= 32);
23085 match(Set dst (VectorBlend (Binary src1 src2) mask));
23086 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
23087 effect(TEMP vtmp, TEMP dst);
23088 ins_encode %{
23089 int vlen_enc = vector_length_encoding(this);
23090 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
23091 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23092 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23093 %}
23094 ins_pipe( pipe_slow );
23095 %}
23096
23097 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
23098 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
23099 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
23100 match(Set dst (VectorBlend (Binary src1 src2) mask));
23101 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23102 effect(TEMP ktmp);
23103 ins_encode %{
23104 int vlen_enc = Assembler::AVX_512bit;
23105 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23106 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
23107 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23108 %}
23109 ins_pipe( pipe_slow );
23110 %}
23111
23112
23113 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
23114 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
23115 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
23116 VM_Version::supports_avx512bw()));
23117 match(Set dst (VectorBlend (Binary src1 src2) mask));
23118 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
23119 ins_encode %{
23120 int vlen_enc = vector_length_encoding(this);
23121 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23122 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23123 %}
23124 ins_pipe( pipe_slow );
23125 %}
23126
23127 // --------------------------------- ABS --------------------------------------
23128 // a = |a|
23129 instruct vabsB_reg(vec dst, vec src) %{
23130 match(Set dst (AbsVB src));
23131 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
23132 ins_encode %{
23133 uint vlen = Matcher::vector_length(this);
23134 if (vlen <= 16) {
23135 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23136 } else {
23137 int vlen_enc = vector_length_encoding(this);
23138 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23139 }
23140 %}
23141 ins_pipe( pipe_slow );
23142 %}
23143
23144 instruct vabsS_reg(vec dst, vec src) %{
23145 match(Set dst (AbsVS src));
23146 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
23147 ins_encode %{
23148 uint vlen = Matcher::vector_length(this);
23149 if (vlen <= 8) {
23150 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23151 } else {
23152 int vlen_enc = vector_length_encoding(this);
23153 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23154 }
23155 %}
23156 ins_pipe( pipe_slow );
23157 %}
23158
23159 instruct vabsI_reg(vec dst, vec src) %{
23160 match(Set dst (AbsVI src));
23161 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
23162 ins_encode %{
23163 uint vlen = Matcher::vector_length(this);
23164 if (vlen <= 4) {
23165 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23166 } else {
23167 int vlen_enc = vector_length_encoding(this);
23168 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23169 }
23170 %}
23171 ins_pipe( pipe_slow );
23172 %}
23173
23174 instruct vabsL_reg(vec dst, vec src) %{
23175 match(Set dst (AbsVL src));
23176 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
23177 ins_encode %{
23178 assert(UseAVX > 2, "required");
23179 int vlen_enc = vector_length_encoding(this);
23180 if (!VM_Version::supports_avx512vl()) {
23181 vlen_enc = Assembler::AVX_512bit;
23182 }
23183 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23184 %}
23185 ins_pipe( pipe_slow );
23186 %}
23187
23188 // --------------------------------- ABSNEG --------------------------------------
23189
23190 instruct vabsnegF(vec dst, vec src) %{
23191 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
23192 match(Set dst (AbsVF src));
23193 match(Set dst (NegVF src));
23194 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
23195 ins_cost(150);
23196 ins_encode %{
23197 int opcode = this->ideal_Opcode();
23198 int vlen = Matcher::vector_length(this);
23199 if (vlen == 2) {
23200 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23201 } else {
23202 assert(vlen == 8 || vlen == 16, "required");
23203 int vlen_enc = vector_length_encoding(this);
23204 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23205 }
23206 %}
23207 ins_pipe( pipe_slow );
23208 %}
23209
23210 instruct vabsneg4F(vec dst) %{
23211 predicate(Matcher::vector_length(n) == 4);
23212 match(Set dst (AbsVF dst));
23213 match(Set dst (NegVF dst));
23214 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
23215 ins_cost(150);
23216 ins_encode %{
23217 int opcode = this->ideal_Opcode();
23218 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
23219 %}
23220 ins_pipe( pipe_slow );
23221 %}
23222
23223 instruct vabsnegD(vec dst, vec src) %{
23224 match(Set dst (AbsVD src));
23225 match(Set dst (NegVD src));
23226 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
23227 ins_encode %{
23228 int opcode = this->ideal_Opcode();
23229 uint vlen = Matcher::vector_length(this);
23230 if (vlen == 2) {
23231 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
23232 } else {
23233 int vlen_enc = vector_length_encoding(this);
23234 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23235 }
23236 %}
23237 ins_pipe( pipe_slow );
23238 %}
23239
23240 //------------------------------------- VectorTest --------------------------------------------
23241
23242 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
23243 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
23244 match(Set cr (VectorTest src1 src2));
23245 effect(TEMP vtmp);
23246 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23247 ins_encode %{
23248 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23249 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23250 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23251 %}
23252 ins_pipe( pipe_slow );
23253 %}
23254
23255 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23256 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23257 match(Set cr (VectorTest src1 src2));
23258 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23259 ins_encode %{
23260 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23261 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23262 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23263 %}
23264 ins_pipe( pipe_slow );
23265 %}
23266
23267 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23268 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23269 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23270 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23271 match(Set cr (VectorTest src1 src2));
23272 effect(TEMP tmp);
23273 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23274 ins_encode %{
23275 uint masklen = Matcher::vector_length(this, $src1);
23276 __ kmovwl($tmp$$Register, $src1$$KRegister);
23277 __ andl($tmp$$Register, (1 << masklen) - 1);
23278 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23279 %}
23280 ins_pipe( pipe_slow );
23281 %}
23282
23283 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23284 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23285 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23286 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23287 match(Set cr (VectorTest src1 src2));
23288 effect(TEMP tmp);
23289 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23290 ins_encode %{
23291 uint masklen = Matcher::vector_length(this, $src1);
23292 __ kmovwl($tmp$$Register, $src1$$KRegister);
23293 __ andl($tmp$$Register, (1 << masklen) - 1);
23294 %}
23295 ins_pipe( pipe_slow );
23296 %}
23297
23298 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23299 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23300 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23301 match(Set cr (VectorTest src1 src2));
23302 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23303 ins_encode %{
23304 uint masklen = Matcher::vector_length(this, $src1);
23305 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23306 %}
23307 ins_pipe( pipe_slow );
23308 %}
23309
23310 //------------------------------------- LoadMask --------------------------------------------
23311
23312 instruct loadMask(legVec dst, legVec src) %{
23313 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23314 match(Set dst (VectorLoadMask src));
23315 effect(TEMP dst);
23316 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23317 ins_encode %{
23318 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23319 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23320 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23321 %}
23322 ins_pipe( pipe_slow );
23323 %}
23324
23325 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23326 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23327 match(Set dst (VectorLoadMask src));
23328 effect(TEMP xtmp);
23329 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23330 ins_encode %{
23331 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23332 true, Assembler::AVX_512bit);
23333 %}
23334 ins_pipe( pipe_slow );
23335 %}
23336
23337 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23338 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23339 match(Set dst (VectorLoadMask src));
23340 effect(TEMP xtmp);
23341 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23342 ins_encode %{
23343 int vlen_enc = vector_length_encoding(in(1));
23344 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23345 false, vlen_enc);
23346 %}
23347 ins_pipe( pipe_slow );
23348 %}
23349
23350 //------------------------------------- StoreMask --------------------------------------------
23351
23352 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23353 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23354 match(Set dst (VectorStoreMask src size));
23355 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23356 ins_encode %{
23357 int vlen = Matcher::vector_length(this);
23358 if (vlen <= 16 && UseAVX <= 2) {
23359 assert(UseSSE >= 3, "required");
23360 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23361 } else {
23362 assert(UseAVX > 0, "required");
23363 int src_vlen_enc = vector_length_encoding(this, $src);
23364 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23365 }
23366 %}
23367 ins_pipe( pipe_slow );
23368 %}
23369
23370 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23371 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23372 match(Set dst (VectorStoreMask src size));
23373 effect(TEMP_DEF dst, TEMP xtmp);
23374 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23375 ins_encode %{
23376 int vlen_enc = Assembler::AVX_128bit;
23377 int vlen = Matcher::vector_length(this);
23378 if (vlen <= 8) {
23379 assert(UseSSE >= 3, "required");
23380 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23381 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23382 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23383 } else {
23384 assert(UseAVX > 0, "required");
23385 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23386 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23387 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23388 }
23389 %}
23390 ins_pipe( pipe_slow );
23391 %}
23392
23393 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23394 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23395 match(Set dst (VectorStoreMask src size));
23396 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23397 effect(TEMP_DEF dst, TEMP xtmp);
23398 ins_encode %{
23399 int vlen_enc = Assembler::AVX_128bit;
23400 int vlen = Matcher::vector_length(this);
23401 if (vlen <= 4) {
23402 assert(UseSSE >= 3, "required");
23403 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23404 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23405 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23406 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23407 } else {
23408 assert(UseAVX > 0, "required");
23409 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23410 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23411 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23412 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23413 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23414 }
23415 %}
23416 ins_pipe( pipe_slow );
23417 %}
23418
23419 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23420 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23421 match(Set dst (VectorStoreMask src size));
23422 effect(TEMP_DEF dst, TEMP xtmp);
23423 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23424 ins_encode %{
23425 assert(UseSSE >= 3, "required");
23426 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23427 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23428 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23429 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23430 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23431 %}
23432 ins_pipe( pipe_slow );
23433 %}
23434
23435 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23436 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23437 match(Set dst (VectorStoreMask src size));
23438 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23439 effect(TEMP_DEF dst, TEMP vtmp);
23440 ins_encode %{
23441 int vlen_enc = Assembler::AVX_128bit;
23442 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23443 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23444 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23445 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23446 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23447 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23448 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23449 %}
23450 ins_pipe( pipe_slow );
23451 %}
23452
23453 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23454 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23455 match(Set dst (VectorStoreMask src size));
23456 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23457 ins_encode %{
23458 int src_vlen_enc = vector_length_encoding(this, $src);
23459 int dst_vlen_enc = vector_length_encoding(this);
23460 if (!VM_Version::supports_avx512vl()) {
23461 src_vlen_enc = Assembler::AVX_512bit;
23462 }
23463 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23464 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23465 %}
23466 ins_pipe( pipe_slow );
23467 %}
23468
23469 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23470 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23471 match(Set dst (VectorStoreMask src size));
23472 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23473 ins_encode %{
23474 int src_vlen_enc = vector_length_encoding(this, $src);
23475 int dst_vlen_enc = vector_length_encoding(this);
23476 if (!VM_Version::supports_avx512vl()) {
23477 src_vlen_enc = Assembler::AVX_512bit;
23478 }
23479 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23480 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23481 %}
23482 ins_pipe( pipe_slow );
23483 %}
23484
23485 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23486 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23487 match(Set dst (VectorStoreMask mask size));
23488 effect(TEMP_DEF dst);
23489 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23490 ins_encode %{
23491 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23492 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23493 false, Assembler::AVX_512bit, noreg);
23494 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23495 %}
23496 ins_pipe( pipe_slow );
23497 %}
23498
23499 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23500 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23501 match(Set dst (VectorStoreMask mask size));
23502 effect(TEMP_DEF dst);
23503 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23504 ins_encode %{
23505 int dst_vlen_enc = vector_length_encoding(this);
23506 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23507 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23508 %}
23509 ins_pipe( pipe_slow );
23510 %}
23511
23512 instruct vmaskcast_evex(kReg dst) %{
23513 match(Set dst (VectorMaskCast dst));
23514 ins_cost(0);
23515 format %{ "vector_mask_cast $dst" %}
23516 ins_encode %{
23517 // empty
23518 %}
23519 ins_pipe(empty);
23520 %}
23521
23522 instruct vmaskcast(vec dst) %{
23523 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23524 match(Set dst (VectorMaskCast dst));
23525 ins_cost(0);
23526 format %{ "vector_mask_cast $dst" %}
23527 ins_encode %{
23528 // empty
23529 %}
23530 ins_pipe(empty);
23531 %}
23532
23533 instruct vmaskcast_avx(vec dst, vec src) %{
23534 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23535 match(Set dst (VectorMaskCast src));
23536 format %{ "vector_mask_cast $dst, $src" %}
23537 ins_encode %{
23538 int vlen = Matcher::vector_length(this);
23539 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23540 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23541 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23542 %}
23543 ins_pipe(pipe_slow);
23544 %}
23545
23546 //-------------------------------- Load Iota Indices ----------------------------------
23547
23548 instruct loadIotaIndices(vec dst, immI_0 src) %{
23549 match(Set dst (VectorLoadConst src));
23550 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23551 ins_encode %{
23552 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23553 BasicType bt = Matcher::vector_element_basic_type(this);
23554 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23555 %}
23556 ins_pipe( pipe_slow );
23557 %}
23558
23559 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23560 match(Set dst (PopulateIndex src1 src2));
23561 effect(TEMP dst, TEMP vtmp);
23562 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23563 ins_encode %{
23564 assert($src2$$constant == 1, "required");
23565 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23566 int vlen_enc = vector_length_encoding(this);
23567 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23568 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23569 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23570 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23571 %}
23572 ins_pipe( pipe_slow );
23573 %}
23574
23575 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23576 match(Set dst (PopulateIndex src1 src2));
23577 effect(TEMP dst, TEMP vtmp);
23578 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23579 ins_encode %{
23580 assert($src2$$constant == 1, "required");
23581 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23582 int vlen_enc = vector_length_encoding(this);
23583 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23584 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23585 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23586 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23587 %}
23588 ins_pipe( pipe_slow );
23589 %}
23590
23591 //-------------------------------- Rearrange ----------------------------------
23592
23593 // LoadShuffle/Rearrange for Byte
23594 instruct rearrangeB(vec dst, vec shuffle) %{
23595 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23596 Matcher::vector_length(n) < 32);
23597 match(Set dst (VectorRearrange dst shuffle));
23598 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23599 ins_encode %{
23600 assert(UseSSE >= 4, "required");
23601 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23602 %}
23603 ins_pipe( pipe_slow );
23604 %}
23605
23606 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23607 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23608 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23609 match(Set dst (VectorRearrange src shuffle));
23610 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23611 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23612 ins_encode %{
23613 assert(UseAVX >= 2, "required");
23614 // Swap src into vtmp1
23615 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23616 // Shuffle swapped src to get entries from other 128 bit lane
23617 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23618 // Shuffle original src to get entries from self 128 bit lane
23619 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23620 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23621 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23622 // Perform the blend
23623 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23624 %}
23625 ins_pipe( pipe_slow );
23626 %}
23627
23628
23629 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23630 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23631 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23632 match(Set dst (VectorRearrange src shuffle));
23633 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23634 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23635 ins_encode %{
23636 int vlen_enc = vector_length_encoding(this);
23637 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23638 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23639 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23640 %}
23641 ins_pipe( pipe_slow );
23642 %}
23643
23644 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23645 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23646 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23647 match(Set dst (VectorRearrange src shuffle));
23648 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23649 ins_encode %{
23650 int vlen_enc = vector_length_encoding(this);
23651 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23652 %}
23653 ins_pipe( pipe_slow );
23654 %}
23655
23656 // LoadShuffle/Rearrange for Short
23657
23658 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23659 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23660 !VM_Version::supports_avx512bw());
23661 match(Set dst (VectorLoadShuffle src));
23662 effect(TEMP dst, TEMP vtmp);
23663 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23664 ins_encode %{
23665 // Create a byte shuffle mask from short shuffle mask
23666 // only byte shuffle instruction available on these platforms
23667 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23668 if (UseAVX == 0) {
23669 assert(vlen_in_bytes <= 16, "required");
23670 // Multiply each shuffle by two to get byte index
23671 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23672 __ psllw($vtmp$$XMMRegister, 1);
23673
23674 // Duplicate to create 2 copies of byte index
23675 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23676 __ psllw($dst$$XMMRegister, 8);
23677 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23678
23679 // Add one to get alternate byte index
23680 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23681 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23682 } else {
23683 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23684 int vlen_enc = vector_length_encoding(this);
23685 // Multiply each shuffle by two to get byte index
23686 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23687
23688 // Duplicate to create 2 copies of byte index
23689 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23690 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23691
23692 // Add one to get alternate byte index
23693 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23694 }
23695 %}
23696 ins_pipe( pipe_slow );
23697 %}
23698
23699 instruct rearrangeS(vec dst, vec shuffle) %{
23700 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23701 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23702 match(Set dst (VectorRearrange dst shuffle));
23703 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23704 ins_encode %{
23705 assert(UseSSE >= 4, "required");
23706 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23707 %}
23708 ins_pipe( pipe_slow );
23709 %}
23710
23711 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23712 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23713 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23714 match(Set dst (VectorRearrange src shuffle));
23715 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23716 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23717 ins_encode %{
23718 assert(UseAVX >= 2, "required");
23719 // Swap src into vtmp1
23720 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23721 // Shuffle swapped src to get entries from other 128 bit lane
23722 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23723 // Shuffle original src to get entries from self 128 bit lane
23724 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23725 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23726 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23727 // Perform the blend
23728 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23729 %}
23730 ins_pipe( pipe_slow );
23731 %}
23732
23733 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23734 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23735 VM_Version::supports_avx512bw());
23736 match(Set dst (VectorRearrange src shuffle));
23737 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23738 ins_encode %{
23739 int vlen_enc = vector_length_encoding(this);
23740 if (!VM_Version::supports_avx512vl()) {
23741 vlen_enc = Assembler::AVX_512bit;
23742 }
23743 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23744 %}
23745 ins_pipe( pipe_slow );
23746 %}
23747
23748 // LoadShuffle/Rearrange for Integer and Float
23749
23750 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23751 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23752 Matcher::vector_length(n) == 4 && UseAVX == 0);
23753 match(Set dst (VectorLoadShuffle src));
23754 effect(TEMP dst, TEMP vtmp);
23755 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23756 ins_encode %{
23757 assert(UseSSE >= 4, "required");
23758
23759 // Create a byte shuffle mask from int shuffle mask
23760 // only byte shuffle instruction available on these platforms
23761
23762 // Duplicate and multiply each shuffle by 4
23763 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23764 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23765 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23766 __ psllw($vtmp$$XMMRegister, 2);
23767
23768 // Duplicate again to create 4 copies of byte index
23769 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23770 __ psllw($dst$$XMMRegister, 8);
23771 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23772
23773 // Add 3,2,1,0 to get alternate byte index
23774 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23775 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23776 %}
23777 ins_pipe( pipe_slow );
23778 %}
23779
23780 instruct rearrangeI(vec dst, vec shuffle) %{
23781 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23782 UseAVX == 0);
23783 match(Set dst (VectorRearrange dst shuffle));
23784 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23785 ins_encode %{
23786 assert(UseSSE >= 4, "required");
23787 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23788 %}
23789 ins_pipe( pipe_slow );
23790 %}
23791
23792 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23793 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23794 UseAVX > 0);
23795 match(Set dst (VectorRearrange src shuffle));
23796 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23797 ins_encode %{
23798 int vlen_enc = vector_length_encoding(this);
23799 BasicType bt = Matcher::vector_element_basic_type(this);
23800 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23801 %}
23802 ins_pipe( pipe_slow );
23803 %}
23804
23805 // LoadShuffle/Rearrange for Long and Double
23806
23807 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23808 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23809 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23810 match(Set dst (VectorLoadShuffle src));
23811 effect(TEMP dst, TEMP vtmp);
23812 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23813 ins_encode %{
23814 assert(UseAVX >= 2, "required");
23815
23816 int vlen_enc = vector_length_encoding(this);
23817 // Create a double word shuffle mask from long shuffle mask
23818 // only double word shuffle instruction available on these platforms
23819
23820 // Multiply each shuffle by two to get double word index
23821 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23822
23823 // Duplicate each double word shuffle
23824 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23825 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23826
23827 // Add one to get alternate double word index
23828 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23829 %}
23830 ins_pipe( pipe_slow );
23831 %}
23832
23833 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23834 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23835 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23836 match(Set dst (VectorRearrange src shuffle));
23837 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23838 ins_encode %{
23839 assert(UseAVX >= 2, "required");
23840
23841 int vlen_enc = vector_length_encoding(this);
23842 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23843 %}
23844 ins_pipe( pipe_slow );
23845 %}
23846
23847 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23848 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23849 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23850 match(Set dst (VectorRearrange src shuffle));
23851 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23852 ins_encode %{
23853 assert(UseAVX > 2, "required");
23854
23855 int vlen_enc = vector_length_encoding(this);
23856 if (vlen_enc == Assembler::AVX_128bit) {
23857 vlen_enc = Assembler::AVX_256bit;
23858 }
23859 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23860 %}
23861 ins_pipe( pipe_slow );
23862 %}
23863
23864 // --------------------------------- FMA --------------------------------------
23865 // a * b + c
23866
23867 instruct vfmaF_reg(vec a, vec b, vec c) %{
23868 match(Set c (FmaVF c (Binary a b)));
23869 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23870 ins_cost(150);
23871 ins_encode %{
23872 assert(UseFMA, "not enabled");
23873 int vlen_enc = vector_length_encoding(this);
23874 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23875 %}
23876 ins_pipe( pipe_slow );
23877 %}
23878
23879 instruct vfmaF_mem(vec a, memory b, vec c) %{
23880 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23881 match(Set c (FmaVF c (Binary a (LoadVector b))));
23882 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23883 ins_cost(150);
23884 ins_encode %{
23885 assert(UseFMA, "not enabled");
23886 int vlen_enc = vector_length_encoding(this);
23887 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23888 %}
23889 ins_pipe( pipe_slow );
23890 %}
23891
23892 instruct vfmaD_reg(vec a, vec b, vec c) %{
23893 match(Set c (FmaVD c (Binary a b)));
23894 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23895 ins_cost(150);
23896 ins_encode %{
23897 assert(UseFMA, "not enabled");
23898 int vlen_enc = vector_length_encoding(this);
23899 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23900 %}
23901 ins_pipe( pipe_slow );
23902 %}
23903
23904 instruct vfmaD_mem(vec a, memory b, vec c) %{
23905 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23906 match(Set c (FmaVD c (Binary a (LoadVector b))));
23907 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23908 ins_cost(150);
23909 ins_encode %{
23910 assert(UseFMA, "not enabled");
23911 int vlen_enc = vector_length_encoding(this);
23912 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23913 %}
23914 ins_pipe( pipe_slow );
23915 %}
23916
23917 // --------------------------------- Vector Multiply Add --------------------------------------
23918
23919 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23920 predicate(UseAVX == 0);
23921 match(Set dst (MulAddVS2VI dst src1));
23922 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23923 ins_encode %{
23924 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23925 %}
23926 ins_pipe( pipe_slow );
23927 %}
23928
23929 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23930 predicate(UseAVX > 0);
23931 match(Set dst (MulAddVS2VI src1 src2));
23932 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23933 ins_encode %{
23934 int vlen_enc = vector_length_encoding(this);
23935 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23936 %}
23937 ins_pipe( pipe_slow );
23938 %}
23939
23940 // --------------------------------- Vector Multiply Add Add ----------------------------------
23941
23942 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23943 predicate(VM_Version::supports_avx512_vnni());
23944 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23945 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23946 ins_encode %{
23947 assert(UseAVX > 2, "required");
23948 int vlen_enc = vector_length_encoding(this);
23949 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23950 %}
23951 ins_pipe( pipe_slow );
23952 ins_cost(10);
23953 %}
23954
23955 // --------------------------------- PopCount --------------------------------------
23956
23957 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23958 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23959 match(Set dst (PopCountVI src));
23960 match(Set dst (PopCountVL src));
23961 format %{ "vector_popcount_integral $dst, $src" %}
23962 ins_encode %{
23963 int opcode = this->ideal_Opcode();
23964 int vlen_enc = vector_length_encoding(this, $src);
23965 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23966 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23967 %}
23968 ins_pipe( pipe_slow );
23969 %}
23970
23971 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23972 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23973 match(Set dst (PopCountVI src mask));
23974 match(Set dst (PopCountVL src mask));
23975 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23976 ins_encode %{
23977 int vlen_enc = vector_length_encoding(this, $src);
23978 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23979 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23980 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23981 %}
23982 ins_pipe( pipe_slow );
23983 %}
23984
23985 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23986 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23987 match(Set dst (PopCountVI src));
23988 match(Set dst (PopCountVL src));
23989 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23990 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23991 ins_encode %{
23992 int opcode = this->ideal_Opcode();
23993 int vlen_enc = vector_length_encoding(this, $src);
23994 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23995 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23996 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23997 %}
23998 ins_pipe( pipe_slow );
23999 %}
24000
24001 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
24002
24003 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
24004 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24005 Matcher::vector_length_in_bytes(n->in(1))));
24006 match(Set dst (CountTrailingZerosV src));
24007 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
24008 ins_cost(400);
24009 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
24010 ins_encode %{
24011 int vlen_enc = vector_length_encoding(this, $src);
24012 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24013 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24014 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24015 %}
24016 ins_pipe( pipe_slow );
24017 %}
24018
24019 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24020 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24021 VM_Version::supports_avx512cd() &&
24022 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24023 match(Set dst (CountTrailingZerosV src));
24024 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24025 ins_cost(400);
24026 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
24027 ins_encode %{
24028 int vlen_enc = vector_length_encoding(this, $src);
24029 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24030 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24031 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
24032 %}
24033 ins_pipe( pipe_slow );
24034 %}
24035
24036 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
24037 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24038 match(Set dst (CountTrailingZerosV src));
24039 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
24040 ins_cost(400);
24041 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
24042 ins_encode %{
24043 int vlen_enc = vector_length_encoding(this, $src);
24044 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24045 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24046 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
24047 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
24048 %}
24049 ins_pipe( pipe_slow );
24050 %}
24051
24052 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24053 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24054 match(Set dst (CountTrailingZerosV src));
24055 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24056 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24057 ins_encode %{
24058 int vlen_enc = vector_length_encoding(this, $src);
24059 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24060 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24061 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24062 %}
24063 ins_pipe( pipe_slow );
24064 %}
24065
24066
24067 // --------------------------------- Bitwise Ternary Logic ----------------------------------
24068
24069 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
24070 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
24071 effect(TEMP dst);
24072 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24073 ins_encode %{
24074 int vector_len = vector_length_encoding(this);
24075 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
24076 %}
24077 ins_pipe( pipe_slow );
24078 %}
24079
24080 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
24081 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
24082 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
24083 effect(TEMP dst);
24084 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
24085 ins_encode %{
24086 int vector_len = vector_length_encoding(this);
24087 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
24088 %}
24089 ins_pipe( pipe_slow );
24090 %}
24091
24092 // --------------------------------- Rotation Operations ----------------------------------
24093 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
24094 match(Set dst (RotateLeftV src shift));
24095 match(Set dst (RotateRightV src shift));
24096 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
24097 ins_encode %{
24098 int opcode = this->ideal_Opcode();
24099 int vector_len = vector_length_encoding(this);
24100 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24101 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
24102 %}
24103 ins_pipe( pipe_slow );
24104 %}
24105
24106 instruct vprorate(vec dst, vec src, vec shift) %{
24107 match(Set dst (RotateLeftV src shift));
24108 match(Set dst (RotateRightV src shift));
24109 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
24110 ins_encode %{
24111 int opcode = this->ideal_Opcode();
24112 int vector_len = vector_length_encoding(this);
24113 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
24114 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
24115 %}
24116 ins_pipe( pipe_slow );
24117 %}
24118
24119 // ---------------------------------- Masked Operations ------------------------------------
24120 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
24121 predicate(!n->in(3)->bottom_type()->isa_vectmask());
24122 match(Set dst (LoadVectorMasked mem mask));
24123 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24124 ins_encode %{
24125 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24126 int vlen_enc = vector_length_encoding(this);
24127 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
24128 %}
24129 ins_pipe( pipe_slow );
24130 %}
24131
24132
24133 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
24134 predicate(n->in(3)->bottom_type()->isa_vectmask());
24135 match(Set dst (LoadVectorMasked mem mask));
24136 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
24137 ins_encode %{
24138 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
24139 int vector_len = vector_length_encoding(this);
24140 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
24141 %}
24142 ins_pipe( pipe_slow );
24143 %}
24144
24145 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
24146 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
24147 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24148 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24149 ins_encode %{
24150 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24151 int vlen_enc = vector_length_encoding(src_node);
24152 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24153 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
24154 %}
24155 ins_pipe( pipe_slow );
24156 %}
24157
24158 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
24159 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
24160 match(Set mem (StoreVectorMasked mem (Binary src mask)));
24161 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
24162 ins_encode %{
24163 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
24164 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
24165 int vlen_enc = vector_length_encoding(src_node);
24166 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
24167 %}
24168 ins_pipe( pipe_slow );
24169 %}
24170
24171 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
24172 match(Set addr (VerifyVectorAlignment addr mask));
24173 effect(KILL cr);
24174 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
24175 ins_encode %{
24176 Label Lskip;
24177 // check if masked bits of addr are zero
24178 __ testq($addr$$Register, $mask$$constant);
24179 __ jccb(Assembler::equal, Lskip);
24180 __ stop("verify_vector_alignment found a misaligned vector memory access");
24181 __ bind(Lskip);
24182 %}
24183 ins_pipe(pipe_slow);
24184 %}
24185
24186 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
24187 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
24188 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
24189 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
24190 ins_encode %{
24191 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
24192 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
24193
24194 Label DONE;
24195 int vlen_enc = vector_length_encoding(this, $src1);
24196 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
24197
24198 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
24199 __ mov64($dst$$Register, -1L);
24200 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
24201 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
24202 __ jccb(Assembler::carrySet, DONE);
24203 __ kmovql($dst$$Register, $ktmp1$$KRegister);
24204 __ notq($dst$$Register);
24205 __ tzcntq($dst$$Register, $dst$$Register);
24206 __ bind(DONE);
24207 %}
24208 ins_pipe( pipe_slow );
24209 %}
24210
24211
24212 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
24213 match(Set dst (VectorMaskGen len));
24214 effect(TEMP temp, KILL cr);
24215 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
24216 ins_encode %{
24217 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
24218 %}
24219 ins_pipe( pipe_slow );
24220 %}
24221
24222 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
24223 match(Set dst (VectorMaskGen len));
24224 format %{ "vector_mask_gen $len \t! vector mask generator" %}
24225 effect(TEMP temp);
24226 ins_encode %{
24227 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
24228 __ kmovql($dst$$KRegister, $temp$$Register);
24229 %}
24230 ins_pipe( pipe_slow );
24231 %}
24232
24233 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
24234 predicate(n->in(1)->bottom_type()->isa_vectmask());
24235 match(Set dst (VectorMaskToLong mask));
24236 effect(TEMP dst, KILL cr);
24237 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
24238 ins_encode %{
24239 int opcode = this->ideal_Opcode();
24240 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24241 int mask_len = Matcher::vector_length(this, $mask);
24242 int mask_size = mask_len * type2aelembytes(mbt);
24243 int vlen_enc = vector_length_encoding(this, $mask);
24244 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24245 $dst$$Register, mask_len, mask_size, vlen_enc);
24246 %}
24247 ins_pipe( pipe_slow );
24248 %}
24249
24250 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24251 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24252 match(Set dst (VectorMaskToLong mask));
24253 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24254 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24255 ins_encode %{
24256 int opcode = this->ideal_Opcode();
24257 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24258 int mask_len = Matcher::vector_length(this, $mask);
24259 int vlen_enc = vector_length_encoding(this, $mask);
24260 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24261 $dst$$Register, mask_len, mbt, vlen_enc);
24262 %}
24263 ins_pipe( pipe_slow );
24264 %}
24265
24266 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24267 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24268 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24269 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24270 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24271 ins_encode %{
24272 int opcode = this->ideal_Opcode();
24273 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24274 int mask_len = Matcher::vector_length(this, $mask);
24275 int vlen_enc = vector_length_encoding(this, $mask);
24276 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24277 $dst$$Register, mask_len, mbt, vlen_enc);
24278 %}
24279 ins_pipe( pipe_slow );
24280 %}
24281
24282 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24283 predicate(n->in(1)->bottom_type()->isa_vectmask());
24284 match(Set dst (VectorMaskTrueCount mask));
24285 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24286 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24287 ins_encode %{
24288 int opcode = this->ideal_Opcode();
24289 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24290 int mask_len = Matcher::vector_length(this, $mask);
24291 int mask_size = mask_len * type2aelembytes(mbt);
24292 int vlen_enc = vector_length_encoding(this, $mask);
24293 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24294 $tmp$$Register, mask_len, mask_size, vlen_enc);
24295 %}
24296 ins_pipe( pipe_slow );
24297 %}
24298
24299 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24300 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24301 match(Set dst (VectorMaskTrueCount mask));
24302 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24303 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24304 ins_encode %{
24305 int opcode = this->ideal_Opcode();
24306 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24307 int mask_len = Matcher::vector_length(this, $mask);
24308 int vlen_enc = vector_length_encoding(this, $mask);
24309 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24310 $tmp$$Register, mask_len, mbt, vlen_enc);
24311 %}
24312 ins_pipe( pipe_slow );
24313 %}
24314
24315 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24316 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24317 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24318 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24319 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24320 ins_encode %{
24321 int opcode = this->ideal_Opcode();
24322 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24323 int mask_len = Matcher::vector_length(this, $mask);
24324 int vlen_enc = vector_length_encoding(this, $mask);
24325 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24326 $tmp$$Register, mask_len, mbt, vlen_enc);
24327 %}
24328 ins_pipe( pipe_slow );
24329 %}
24330
24331 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24332 predicate(n->in(1)->bottom_type()->isa_vectmask());
24333 match(Set dst (VectorMaskFirstTrue mask));
24334 match(Set dst (VectorMaskLastTrue mask));
24335 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24336 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24337 ins_encode %{
24338 int opcode = this->ideal_Opcode();
24339 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24340 int mask_len = Matcher::vector_length(this, $mask);
24341 int mask_size = mask_len * type2aelembytes(mbt);
24342 int vlen_enc = vector_length_encoding(this, $mask);
24343 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24344 $tmp$$Register, mask_len, mask_size, vlen_enc);
24345 %}
24346 ins_pipe( pipe_slow );
24347 %}
24348
24349 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24350 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24351 match(Set dst (VectorMaskFirstTrue mask));
24352 match(Set dst (VectorMaskLastTrue mask));
24353 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24354 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24355 ins_encode %{
24356 int opcode = this->ideal_Opcode();
24357 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24358 int mask_len = Matcher::vector_length(this, $mask);
24359 int vlen_enc = vector_length_encoding(this, $mask);
24360 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24361 $tmp$$Register, mask_len, mbt, vlen_enc);
24362 %}
24363 ins_pipe( pipe_slow );
24364 %}
24365
24366 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24367 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24368 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24369 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24370 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24371 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24372 ins_encode %{
24373 int opcode = this->ideal_Opcode();
24374 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24375 int mask_len = Matcher::vector_length(this, $mask);
24376 int vlen_enc = vector_length_encoding(this, $mask);
24377 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24378 $tmp$$Register, mask_len, mbt, vlen_enc);
24379 %}
24380 ins_pipe( pipe_slow );
24381 %}
24382
24383 // --------------------------------- Compress/Expand Operations ---------------------------
24384 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24385 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24386 match(Set dst (CompressV src mask));
24387 match(Set dst (ExpandV src mask));
24388 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24389 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24390 ins_encode %{
24391 int opcode = this->ideal_Opcode();
24392 int vlen_enc = vector_length_encoding(this);
24393 BasicType bt = Matcher::vector_element_basic_type(this);
24394 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24395 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24396 %}
24397 ins_pipe( pipe_slow );
24398 %}
24399
24400 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24401 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24402 match(Set dst (CompressV src mask));
24403 match(Set dst (ExpandV src mask));
24404 format %{ "vector_compress_expand $dst, $src, $mask" %}
24405 ins_encode %{
24406 int opcode = this->ideal_Opcode();
24407 int vector_len = vector_length_encoding(this);
24408 BasicType bt = Matcher::vector_element_basic_type(this);
24409 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24410 %}
24411 ins_pipe( pipe_slow );
24412 %}
24413
24414 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24415 match(Set dst (CompressM mask));
24416 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24417 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24418 ins_encode %{
24419 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24420 int mask_len = Matcher::vector_length(this);
24421 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24422 %}
24423 ins_pipe( pipe_slow );
24424 %}
24425
24426 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24427
24428 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24429 predicate(!VM_Version::supports_gfni());
24430 match(Set dst (ReverseV src));
24431 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24432 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24433 ins_encode %{
24434 int vec_enc = vector_length_encoding(this);
24435 BasicType bt = Matcher::vector_element_basic_type(this);
24436 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24437 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24438 %}
24439 ins_pipe( pipe_slow );
24440 %}
24441
24442 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24443 predicate(VM_Version::supports_gfni());
24444 match(Set dst (ReverseV src));
24445 effect(TEMP dst, TEMP xtmp);
24446 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24447 ins_encode %{
24448 int vec_enc = vector_length_encoding(this);
24449 BasicType bt = Matcher::vector_element_basic_type(this);
24450 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24451 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24452 $xtmp$$XMMRegister);
24453 %}
24454 ins_pipe( pipe_slow );
24455 %}
24456
24457 instruct vreverse_byte_reg(vec dst, vec src) %{
24458 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24459 match(Set dst (ReverseBytesV src));
24460 effect(TEMP dst);
24461 format %{ "vector_reverse_byte $dst, $src" %}
24462 ins_encode %{
24463 int vec_enc = vector_length_encoding(this);
24464 BasicType bt = Matcher::vector_element_basic_type(this);
24465 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24466 %}
24467 ins_pipe( pipe_slow );
24468 %}
24469
24470 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24471 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24472 match(Set dst (ReverseBytesV src));
24473 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24474 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24475 ins_encode %{
24476 int vec_enc = vector_length_encoding(this);
24477 BasicType bt = Matcher::vector_element_basic_type(this);
24478 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24479 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24480 %}
24481 ins_pipe( pipe_slow );
24482 %}
24483
24484 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24485
24486 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24487 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24488 Matcher::vector_length_in_bytes(n->in(1))));
24489 match(Set dst (CountLeadingZerosV src));
24490 format %{ "vector_count_leading_zeros $dst, $src" %}
24491 ins_encode %{
24492 int vlen_enc = vector_length_encoding(this, $src);
24493 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24494 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24495 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24496 %}
24497 ins_pipe( pipe_slow );
24498 %}
24499
24500 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24501 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24502 Matcher::vector_length_in_bytes(n->in(1))));
24503 match(Set dst (CountLeadingZerosV src mask));
24504 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24505 ins_encode %{
24506 int vlen_enc = vector_length_encoding(this, $src);
24507 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24508 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24509 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24510 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24511 %}
24512 ins_pipe( pipe_slow );
24513 %}
24514
24515 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24516 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24517 VM_Version::supports_avx512cd() &&
24518 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24519 match(Set dst (CountLeadingZerosV src));
24520 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24521 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24522 ins_encode %{
24523 int vlen_enc = vector_length_encoding(this, $src);
24524 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24525 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24526 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24527 %}
24528 ins_pipe( pipe_slow );
24529 %}
24530
24531 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24532 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24533 match(Set dst (CountLeadingZerosV src));
24534 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24535 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24536 ins_encode %{
24537 int vlen_enc = vector_length_encoding(this, $src);
24538 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24539 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24540 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24541 $rtmp$$Register, true, vlen_enc);
24542 %}
24543 ins_pipe( pipe_slow );
24544 %}
24545
24546 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24547 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24548 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24549 match(Set dst (CountLeadingZerosV src));
24550 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24551 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24552 ins_encode %{
24553 int vlen_enc = vector_length_encoding(this, $src);
24554 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24555 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24556 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24557 %}
24558 ins_pipe( pipe_slow );
24559 %}
24560
24561 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24562 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24563 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24564 match(Set dst (CountLeadingZerosV src));
24565 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24566 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24567 ins_encode %{
24568 int vlen_enc = vector_length_encoding(this, $src);
24569 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24570 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24571 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24572 %}
24573 ins_pipe( pipe_slow );
24574 %}
24575
24576 // ---------------------------------- Vector Masked Operations ------------------------------------
24577
24578 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24579 match(Set dst (AddVB (Binary dst src2) mask));
24580 match(Set dst (AddVS (Binary dst src2) mask));
24581 match(Set dst (AddVI (Binary dst src2) mask));
24582 match(Set dst (AddVL (Binary dst src2) mask));
24583 match(Set dst (AddVF (Binary dst src2) mask));
24584 match(Set dst (AddVD (Binary dst src2) mask));
24585 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24586 ins_encode %{
24587 int vlen_enc = vector_length_encoding(this);
24588 BasicType bt = Matcher::vector_element_basic_type(this);
24589 int opc = this->ideal_Opcode();
24590 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24591 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24592 %}
24593 ins_pipe( pipe_slow );
24594 %}
24595
24596 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24597 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24598 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24599 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24600 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24601 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24602 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24603 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24604 ins_encode %{
24605 int vlen_enc = vector_length_encoding(this);
24606 BasicType bt = Matcher::vector_element_basic_type(this);
24607 int opc = this->ideal_Opcode();
24608 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24609 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24610 %}
24611 ins_pipe( pipe_slow );
24612 %}
24613
24614 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24615 match(Set dst (XorV (Binary dst src2) mask));
24616 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24617 ins_encode %{
24618 int vlen_enc = vector_length_encoding(this);
24619 BasicType bt = Matcher::vector_element_basic_type(this);
24620 int opc = this->ideal_Opcode();
24621 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24622 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24623 %}
24624 ins_pipe( pipe_slow );
24625 %}
24626
24627 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24628 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24629 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24630 ins_encode %{
24631 int vlen_enc = vector_length_encoding(this);
24632 BasicType bt = Matcher::vector_element_basic_type(this);
24633 int opc = this->ideal_Opcode();
24634 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24635 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24636 %}
24637 ins_pipe( pipe_slow );
24638 %}
24639
24640 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24641 match(Set dst (OrV (Binary dst src2) mask));
24642 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24643 ins_encode %{
24644 int vlen_enc = vector_length_encoding(this);
24645 BasicType bt = Matcher::vector_element_basic_type(this);
24646 int opc = this->ideal_Opcode();
24647 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24648 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24649 %}
24650 ins_pipe( pipe_slow );
24651 %}
24652
24653 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24654 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24655 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24656 ins_encode %{
24657 int vlen_enc = vector_length_encoding(this);
24658 BasicType bt = Matcher::vector_element_basic_type(this);
24659 int opc = this->ideal_Opcode();
24660 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24661 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24662 %}
24663 ins_pipe( pipe_slow );
24664 %}
24665
24666 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24667 match(Set dst (AndV (Binary dst src2) mask));
24668 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24669 ins_encode %{
24670 int vlen_enc = vector_length_encoding(this);
24671 BasicType bt = Matcher::vector_element_basic_type(this);
24672 int opc = this->ideal_Opcode();
24673 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24675 %}
24676 ins_pipe( pipe_slow );
24677 %}
24678
24679 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24680 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24681 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24682 ins_encode %{
24683 int vlen_enc = vector_length_encoding(this);
24684 BasicType bt = Matcher::vector_element_basic_type(this);
24685 int opc = this->ideal_Opcode();
24686 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24687 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24688 %}
24689 ins_pipe( pipe_slow );
24690 %}
24691
24692 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24693 match(Set dst (SubVB (Binary dst src2) mask));
24694 match(Set dst (SubVS (Binary dst src2) mask));
24695 match(Set dst (SubVI (Binary dst src2) mask));
24696 match(Set dst (SubVL (Binary dst src2) mask));
24697 match(Set dst (SubVF (Binary dst src2) mask));
24698 match(Set dst (SubVD (Binary dst src2) mask));
24699 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24700 ins_encode %{
24701 int vlen_enc = vector_length_encoding(this);
24702 BasicType bt = Matcher::vector_element_basic_type(this);
24703 int opc = this->ideal_Opcode();
24704 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24705 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24706 %}
24707 ins_pipe( pipe_slow );
24708 %}
24709
24710 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24711 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24712 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24713 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24714 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24715 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24716 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24717 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24718 ins_encode %{
24719 int vlen_enc = vector_length_encoding(this);
24720 BasicType bt = Matcher::vector_element_basic_type(this);
24721 int opc = this->ideal_Opcode();
24722 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24723 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24724 %}
24725 ins_pipe( pipe_slow );
24726 %}
24727
24728 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24729 match(Set dst (MulVS (Binary dst src2) mask));
24730 match(Set dst (MulVI (Binary dst src2) mask));
24731 match(Set dst (MulVL (Binary dst src2) mask));
24732 match(Set dst (MulVF (Binary dst src2) mask));
24733 match(Set dst (MulVD (Binary dst src2) mask));
24734 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24735 ins_encode %{
24736 int vlen_enc = vector_length_encoding(this);
24737 BasicType bt = Matcher::vector_element_basic_type(this);
24738 int opc = this->ideal_Opcode();
24739 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24740 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24741 %}
24742 ins_pipe( pipe_slow );
24743 %}
24744
24745 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24746 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24747 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24748 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24749 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24750 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24751 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24752 ins_encode %{
24753 int vlen_enc = vector_length_encoding(this);
24754 BasicType bt = Matcher::vector_element_basic_type(this);
24755 int opc = this->ideal_Opcode();
24756 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24757 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24758 %}
24759 ins_pipe( pipe_slow );
24760 %}
24761
24762 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24763 match(Set dst (SqrtVF dst mask));
24764 match(Set dst (SqrtVD dst mask));
24765 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24766 ins_encode %{
24767 int vlen_enc = vector_length_encoding(this);
24768 BasicType bt = Matcher::vector_element_basic_type(this);
24769 int opc = this->ideal_Opcode();
24770 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24771 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24772 %}
24773 ins_pipe( pipe_slow );
24774 %}
24775
24776 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24777 match(Set dst (DivVF (Binary dst src2) mask));
24778 match(Set dst (DivVD (Binary dst src2) mask));
24779 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24780 ins_encode %{
24781 int vlen_enc = vector_length_encoding(this);
24782 BasicType bt = Matcher::vector_element_basic_type(this);
24783 int opc = this->ideal_Opcode();
24784 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24785 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24786 %}
24787 ins_pipe( pipe_slow );
24788 %}
24789
24790 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24791 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24792 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24793 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24794 ins_encode %{
24795 int vlen_enc = vector_length_encoding(this);
24796 BasicType bt = Matcher::vector_element_basic_type(this);
24797 int opc = this->ideal_Opcode();
24798 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24799 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24800 %}
24801 ins_pipe( pipe_slow );
24802 %}
24803
24804
24805 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24806 match(Set dst (RotateLeftV (Binary dst shift) mask));
24807 match(Set dst (RotateRightV (Binary dst shift) mask));
24808 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24809 ins_encode %{
24810 int vlen_enc = vector_length_encoding(this);
24811 BasicType bt = Matcher::vector_element_basic_type(this);
24812 int opc = this->ideal_Opcode();
24813 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24814 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24815 %}
24816 ins_pipe( pipe_slow );
24817 %}
24818
24819 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24820 match(Set dst (RotateLeftV (Binary dst src2) mask));
24821 match(Set dst (RotateRightV (Binary dst src2) mask));
24822 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24823 ins_encode %{
24824 int vlen_enc = vector_length_encoding(this);
24825 BasicType bt = Matcher::vector_element_basic_type(this);
24826 int opc = this->ideal_Opcode();
24827 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24828 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24829 %}
24830 ins_pipe( pipe_slow );
24831 %}
24832
24833 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24834 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24835 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24836 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24837 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24838 ins_encode %{
24839 int vlen_enc = vector_length_encoding(this);
24840 BasicType bt = Matcher::vector_element_basic_type(this);
24841 int opc = this->ideal_Opcode();
24842 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24843 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24844 %}
24845 ins_pipe( pipe_slow );
24846 %}
24847
24848 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24849 predicate(!n->as_ShiftV()->is_var_shift());
24850 match(Set dst (LShiftVS (Binary dst src2) mask));
24851 match(Set dst (LShiftVI (Binary dst src2) mask));
24852 match(Set dst (LShiftVL (Binary dst src2) mask));
24853 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24854 ins_encode %{
24855 int vlen_enc = vector_length_encoding(this);
24856 BasicType bt = Matcher::vector_element_basic_type(this);
24857 int opc = this->ideal_Opcode();
24858 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24859 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24860 %}
24861 ins_pipe( pipe_slow );
24862 %}
24863
24864 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24865 predicate(n->as_ShiftV()->is_var_shift());
24866 match(Set dst (LShiftVS (Binary dst src2) mask));
24867 match(Set dst (LShiftVI (Binary dst src2) mask));
24868 match(Set dst (LShiftVL (Binary dst src2) mask));
24869 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24870 ins_encode %{
24871 int vlen_enc = vector_length_encoding(this);
24872 BasicType bt = Matcher::vector_element_basic_type(this);
24873 int opc = this->ideal_Opcode();
24874 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24875 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24876 %}
24877 ins_pipe( pipe_slow );
24878 %}
24879
24880 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24881 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24882 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24883 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24884 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24885 ins_encode %{
24886 int vlen_enc = vector_length_encoding(this);
24887 BasicType bt = Matcher::vector_element_basic_type(this);
24888 int opc = this->ideal_Opcode();
24889 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24890 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24891 %}
24892 ins_pipe( pipe_slow );
24893 %}
24894
24895 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24896 predicate(!n->as_ShiftV()->is_var_shift());
24897 match(Set dst (RShiftVS (Binary dst src2) mask));
24898 match(Set dst (RShiftVI (Binary dst src2) mask));
24899 match(Set dst (RShiftVL (Binary dst src2) mask));
24900 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24901 ins_encode %{
24902 int vlen_enc = vector_length_encoding(this);
24903 BasicType bt = Matcher::vector_element_basic_type(this);
24904 int opc = this->ideal_Opcode();
24905 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24906 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24907 %}
24908 ins_pipe( pipe_slow );
24909 %}
24910
24911 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24912 predicate(n->as_ShiftV()->is_var_shift());
24913 match(Set dst (RShiftVS (Binary dst src2) mask));
24914 match(Set dst (RShiftVI (Binary dst src2) mask));
24915 match(Set dst (RShiftVL (Binary dst src2) mask));
24916 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24917 ins_encode %{
24918 int vlen_enc = vector_length_encoding(this);
24919 BasicType bt = Matcher::vector_element_basic_type(this);
24920 int opc = this->ideal_Opcode();
24921 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24922 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24923 %}
24924 ins_pipe( pipe_slow );
24925 %}
24926
24927 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24928 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24929 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24930 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24931 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24932 ins_encode %{
24933 int vlen_enc = vector_length_encoding(this);
24934 BasicType bt = Matcher::vector_element_basic_type(this);
24935 int opc = this->ideal_Opcode();
24936 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24937 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24938 %}
24939 ins_pipe( pipe_slow );
24940 %}
24941
24942 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24943 predicate(!n->as_ShiftV()->is_var_shift());
24944 match(Set dst (URShiftVS (Binary dst src2) mask));
24945 match(Set dst (URShiftVI (Binary dst src2) mask));
24946 match(Set dst (URShiftVL (Binary dst src2) mask));
24947 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24948 ins_encode %{
24949 int vlen_enc = vector_length_encoding(this);
24950 BasicType bt = Matcher::vector_element_basic_type(this);
24951 int opc = this->ideal_Opcode();
24952 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24953 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24954 %}
24955 ins_pipe( pipe_slow );
24956 %}
24957
24958 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24959 predicate(n->as_ShiftV()->is_var_shift());
24960 match(Set dst (URShiftVS (Binary dst src2) mask));
24961 match(Set dst (URShiftVI (Binary dst src2) mask));
24962 match(Set dst (URShiftVL (Binary dst src2) mask));
24963 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24964 ins_encode %{
24965 int vlen_enc = vector_length_encoding(this);
24966 BasicType bt = Matcher::vector_element_basic_type(this);
24967 int opc = this->ideal_Opcode();
24968 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24969 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24970 %}
24971 ins_pipe( pipe_slow );
24972 %}
24973
24974 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24975 match(Set dst (MaxV (Binary dst src2) mask));
24976 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24977 ins_encode %{
24978 int vlen_enc = vector_length_encoding(this);
24979 BasicType bt = Matcher::vector_element_basic_type(this);
24980 int opc = this->ideal_Opcode();
24981 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24982 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24983 %}
24984 ins_pipe( pipe_slow );
24985 %}
24986
24987 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24988 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24989 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24990 ins_encode %{
24991 int vlen_enc = vector_length_encoding(this);
24992 BasicType bt = Matcher::vector_element_basic_type(this);
24993 int opc = this->ideal_Opcode();
24994 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24995 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24996 %}
24997 ins_pipe( pipe_slow );
24998 %}
24999
25000 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
25001 match(Set dst (MinV (Binary dst src2) mask));
25002 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25003 ins_encode %{
25004 int vlen_enc = vector_length_encoding(this);
25005 BasicType bt = Matcher::vector_element_basic_type(this);
25006 int opc = this->ideal_Opcode();
25007 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25008 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25009 %}
25010 ins_pipe( pipe_slow );
25011 %}
25012
25013 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
25014 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
25015 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
25016 ins_encode %{
25017 int vlen_enc = vector_length_encoding(this);
25018 BasicType bt = Matcher::vector_element_basic_type(this);
25019 int opc = this->ideal_Opcode();
25020 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25021 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
25022 %}
25023 ins_pipe( pipe_slow );
25024 %}
25025
25026 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
25027 match(Set dst (VectorRearrange (Binary dst src2) mask));
25028 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
25029 ins_encode %{
25030 int vlen_enc = vector_length_encoding(this);
25031 BasicType bt = Matcher::vector_element_basic_type(this);
25032 int opc = this->ideal_Opcode();
25033 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25034 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25035 %}
25036 ins_pipe( pipe_slow );
25037 %}
25038
25039 instruct vabs_masked(vec dst, kReg mask) %{
25040 match(Set dst (AbsVB dst mask));
25041 match(Set dst (AbsVS dst mask));
25042 match(Set dst (AbsVI dst mask));
25043 match(Set dst (AbsVL dst mask));
25044 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
25045 ins_encode %{
25046 int vlen_enc = vector_length_encoding(this);
25047 BasicType bt = Matcher::vector_element_basic_type(this);
25048 int opc = this->ideal_Opcode();
25049 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25050 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
25051 %}
25052 ins_pipe( pipe_slow );
25053 %}
25054
25055 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
25056 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
25057 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
25058 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25059 ins_encode %{
25060 assert(UseFMA, "Needs FMA instructions support.");
25061 int vlen_enc = vector_length_encoding(this);
25062 BasicType bt = Matcher::vector_element_basic_type(this);
25063 int opc = this->ideal_Opcode();
25064 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25065 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
25066 %}
25067 ins_pipe( pipe_slow );
25068 %}
25069
25070 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
25071 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
25072 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
25073 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
25074 ins_encode %{
25075 assert(UseFMA, "Needs FMA instructions support.");
25076 int vlen_enc = vector_length_encoding(this);
25077 BasicType bt = Matcher::vector_element_basic_type(this);
25078 int opc = this->ideal_Opcode();
25079 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
25080 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
25081 %}
25082 ins_pipe( pipe_slow );
25083 %}
25084
25085 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
25086 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
25087 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
25088 ins_encode %{
25089 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
25090 int vlen_enc = vector_length_encoding(this, $src1);
25091 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
25092
25093 // Comparison i
25094 switch (src1_elem_bt) {
25095 case T_BYTE: {
25096 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25097 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25098 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25099 break;
25100 }
25101 case T_SHORT: {
25102 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25103 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25104 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25105 break;
25106 }
25107 case T_INT: {
25108 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25109 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25110 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25111 break;
25112 }
25113 case T_LONG: {
25114 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
25115 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
25116 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
25117 break;
25118 }
25119 case T_FLOAT: {
25120 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25121 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25122 break;
25123 }
25124 case T_DOUBLE: {
25125 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
25126 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
25127 break;
25128 }
25129 default: assert(false, "%s", type2name(src1_elem_bt)); break;
25130 }
25131 %}
25132 ins_pipe( pipe_slow );
25133 %}
25134
25135 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
25136 predicate(Matcher::vector_length(n) <= 32);
25137 match(Set dst (MaskAll src));
25138 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
25139 ins_encode %{
25140 int mask_len = Matcher::vector_length(this);
25141 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
25142 %}
25143 ins_pipe( pipe_slow );
25144 %}
25145
25146 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
25147 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
25148 match(Set dst (XorVMask src (MaskAll cnt)));
25149 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
25150 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
25151 ins_encode %{
25152 uint masklen = Matcher::vector_length(this);
25153 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
25154 %}
25155 ins_pipe( pipe_slow );
25156 %}
25157
25158 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
25159 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
25160 (Matcher::vector_length(n) == 16) ||
25161 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
25162 match(Set dst (XorVMask src (MaskAll cnt)));
25163 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
25164 ins_encode %{
25165 uint masklen = Matcher::vector_length(this);
25166 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
25167 %}
25168 ins_pipe( pipe_slow );
25169 %}
25170
25171 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
25172 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
25173 match(Set dst (VectorLongToMask src));
25174 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
25175 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
25176 ins_encode %{
25177 int mask_len = Matcher::vector_length(this);
25178 int vec_enc = vector_length_encoding(mask_len);
25179 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25180 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
25181 %}
25182 ins_pipe( pipe_slow );
25183 %}
25184
25185
25186 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
25187 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
25188 match(Set dst (VectorLongToMask src));
25189 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
25190 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
25191 ins_encode %{
25192 int mask_len = Matcher::vector_length(this);
25193 assert(mask_len <= 32, "invalid mask length");
25194 int vec_enc = vector_length_encoding(mask_len);
25195 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
25196 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
25197 %}
25198 ins_pipe( pipe_slow );
25199 %}
25200
25201 instruct long_to_mask_evex(kReg dst, rRegL src) %{
25202 predicate(n->bottom_type()->isa_vectmask());
25203 match(Set dst (VectorLongToMask src));
25204 format %{ "long_to_mask_evex $dst, $src\t!" %}
25205 ins_encode %{
25206 __ kmov($dst$$KRegister, $src$$Register);
25207 %}
25208 ins_pipe( pipe_slow );
25209 %}
25210
25211 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
25212 match(Set dst (AndVMask src1 src2));
25213 match(Set dst (OrVMask src1 src2));
25214 match(Set dst (XorVMask src1 src2));
25215 effect(TEMP kscratch);
25216 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
25217 ins_encode %{
25218 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
25219 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
25220 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
25221 uint masklen = Matcher::vector_length(this);
25222 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
25223 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
25224 %}
25225 ins_pipe( pipe_slow );
25226 %}
25227
25228 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
25229 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25230 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25231 ins_encode %{
25232 int vlen_enc = vector_length_encoding(this);
25233 BasicType bt = Matcher::vector_element_basic_type(this);
25234 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25235 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
25236 %}
25237 ins_pipe( pipe_slow );
25238 %}
25239
25240 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25241 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25242 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25243 ins_encode %{
25244 int vlen_enc = vector_length_encoding(this);
25245 BasicType bt = Matcher::vector_element_basic_type(this);
25246 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25247 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25248 %}
25249 ins_pipe( pipe_slow );
25250 %}
25251
25252 instruct castMM(kReg dst)
25253 %{
25254 match(Set dst (CastVV dst));
25255
25256 size(0);
25257 format %{ "# castVV of $dst" %}
25258 ins_encode(/* empty encoding */);
25259 ins_cost(0);
25260 ins_pipe(empty);
25261 %}
25262
25263 instruct castVV(vec dst)
25264 %{
25265 match(Set dst (CastVV dst));
25266
25267 size(0);
25268 format %{ "# castVV of $dst" %}
25269 ins_encode(/* empty encoding */);
25270 ins_cost(0);
25271 ins_pipe(empty);
25272 %}
25273
25274 instruct castVVLeg(legVec dst)
25275 %{
25276 match(Set dst (CastVV dst));
25277
25278 size(0);
25279 format %{ "# castVV of $dst" %}
25280 ins_encode(/* empty encoding */);
25281 ins_cost(0);
25282 ins_pipe(empty);
25283 %}
25284
25285 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25286 %{
25287 match(Set dst (IsInfiniteF src));
25288 effect(TEMP ktmp, KILL cr);
25289 format %{ "float_class_check $dst, $src" %}
25290 ins_encode %{
25291 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25292 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25293 %}
25294 ins_pipe(pipe_slow);
25295 %}
25296
25297 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25298 %{
25299 match(Set dst (IsInfiniteD src));
25300 effect(TEMP ktmp, KILL cr);
25301 format %{ "double_class_check $dst, $src" %}
25302 ins_encode %{
25303 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25304 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25305 %}
25306 ins_pipe(pipe_slow);
25307 %}
25308
25309 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25310 %{
25311 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25312 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25313 match(Set dst (SaturatingAddV src1 src2));
25314 match(Set dst (SaturatingSubV src1 src2));
25315 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25316 ins_encode %{
25317 int vlen_enc = vector_length_encoding(this);
25318 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25319 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25320 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25321 %}
25322 ins_pipe(pipe_slow);
25323 %}
25324
25325 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25326 %{
25327 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25328 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25329 match(Set dst (SaturatingAddV src1 src2));
25330 match(Set dst (SaturatingSubV src1 src2));
25331 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25332 ins_encode %{
25333 int vlen_enc = vector_length_encoding(this);
25334 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25335 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25336 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25337 %}
25338 ins_pipe(pipe_slow);
25339 %}
25340
25341 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25342 %{
25343 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25344 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25345 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25346 match(Set dst (SaturatingAddV src1 src2));
25347 match(Set dst (SaturatingSubV src1 src2));
25348 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25349 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25350 ins_encode %{
25351 int vlen_enc = vector_length_encoding(this);
25352 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25353 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25354 $src1$$XMMRegister, $src2$$XMMRegister,
25355 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25356 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25357 %}
25358 ins_pipe(pipe_slow);
25359 %}
25360
25361 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25362 %{
25363 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25364 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25365 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25366 match(Set dst (SaturatingAddV src1 src2));
25367 match(Set dst (SaturatingSubV src1 src2));
25368 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25369 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25370 ins_encode %{
25371 int vlen_enc = vector_length_encoding(this);
25372 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25373 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25374 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25375 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25376 %}
25377 ins_pipe(pipe_slow);
25378 %}
25379
25380 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25381 %{
25382 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25383 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25384 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25385 match(Set dst (SaturatingAddV src1 src2));
25386 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25387 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25388 ins_encode %{
25389 int vlen_enc = vector_length_encoding(this);
25390 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25391 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25392 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25393 %}
25394 ins_pipe(pipe_slow);
25395 %}
25396
25397 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25398 %{
25399 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25400 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25401 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25402 match(Set dst (SaturatingAddV src1 src2));
25403 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25404 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25405 ins_encode %{
25406 int vlen_enc = vector_length_encoding(this);
25407 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25408 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25409 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25410 %}
25411 ins_pipe(pipe_slow);
25412 %}
25413
25414 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25415 %{
25416 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25417 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25418 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25419 match(Set dst (SaturatingSubV src1 src2));
25420 effect(TEMP ktmp);
25421 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25422 ins_encode %{
25423 int vlen_enc = vector_length_encoding(this);
25424 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25425 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25426 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25427 %}
25428 ins_pipe(pipe_slow);
25429 %}
25430
25431 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25432 %{
25433 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25434 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25435 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25436 match(Set dst (SaturatingSubV src1 src2));
25437 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25438 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25439 ins_encode %{
25440 int vlen_enc = vector_length_encoding(this);
25441 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25442 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25443 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25444 %}
25445 ins_pipe(pipe_slow);
25446 %}
25447
25448 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25449 %{
25450 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25451 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25452 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25453 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25454 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25455 ins_encode %{
25456 int vlen_enc = vector_length_encoding(this);
25457 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25458 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25459 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25460 %}
25461 ins_pipe(pipe_slow);
25462 %}
25463
25464 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25465 %{
25466 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25467 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25468 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25469 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25470 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25471 ins_encode %{
25472 int vlen_enc = vector_length_encoding(this);
25473 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25474 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25475 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25476 %}
25477 ins_pipe(pipe_slow);
25478 %}
25479
25480 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25481 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25482 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25483 match(Set dst (SaturatingAddV (Binary dst src) mask));
25484 match(Set dst (SaturatingSubV (Binary dst src) mask));
25485 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25486 ins_encode %{
25487 int vlen_enc = vector_length_encoding(this);
25488 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25489 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25490 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25491 %}
25492 ins_pipe( pipe_slow );
25493 %}
25494
25495 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25496 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25497 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25498 match(Set dst (SaturatingAddV (Binary dst src) mask));
25499 match(Set dst (SaturatingSubV (Binary dst src) mask));
25500 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25501 ins_encode %{
25502 int vlen_enc = vector_length_encoding(this);
25503 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25504 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25505 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25506 %}
25507 ins_pipe( pipe_slow );
25508 %}
25509
25510 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25511 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25512 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25513 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25514 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25515 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25516 ins_encode %{
25517 int vlen_enc = vector_length_encoding(this);
25518 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25519 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25520 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25521 %}
25522 ins_pipe( pipe_slow );
25523 %}
25524
25525 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25526 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25527 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25528 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25529 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25530 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25531 ins_encode %{
25532 int vlen_enc = vector_length_encoding(this);
25533 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25534 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25535 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25536 %}
25537 ins_pipe( pipe_slow );
25538 %}
25539
25540 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25541 %{
25542 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25543 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25544 ins_encode %{
25545 int vlen_enc = vector_length_encoding(this);
25546 BasicType bt = Matcher::vector_element_basic_type(this);
25547 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25548 %}
25549 ins_pipe(pipe_slow);
25550 %}
25551
25552 instruct reinterpretS2HF(regF dst, rRegI src)
25553 %{
25554 match(Set dst (ReinterpretS2HF src));
25555 format %{ "vmovw $dst, $src" %}
25556 ins_encode %{
25557 __ vmovw($dst$$XMMRegister, $src$$Register);
25558 %}
25559 ins_pipe(pipe_slow);
25560 %}
25561
25562 instruct reinterpretHF2S(rRegI dst, regF src)
25563 %{
25564 match(Set dst (ReinterpretHF2S src));
25565 format %{ "vmovw $dst, $src" %}
25566 ins_encode %{
25567 __ vmovw($dst$$Register, $src$$XMMRegister);
25568 %}
25569 ins_pipe(pipe_slow);
25570 %}
25571
25572 instruct convF2HFAndS2HF(regF dst, regF src)
25573 %{
25574 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25575 format %{ "convF2HFAndS2HF $dst, $src" %}
25576 ins_encode %{
25577 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25578 %}
25579 ins_pipe(pipe_slow);
25580 %}
25581
25582 instruct convHF2SAndHF2F(regF dst, regF src)
25583 %{
25584 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25585 format %{ "convHF2SAndHF2F $dst, $src" %}
25586 ins_encode %{
25587 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25588 %}
25589 ins_pipe(pipe_slow);
25590 %}
25591
25592 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25593 %{
25594 match(Set dst (SqrtHF src));
25595 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25596 ins_encode %{
25597 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25598 %}
25599 ins_pipe(pipe_slow);
25600 %}
25601
25602 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25603 %{
25604 match(Set dst (AddHF src1 src2));
25605 match(Set dst (DivHF src1 src2));
25606 match(Set dst (MulHF src1 src2));
25607 match(Set dst (SubHF src1 src2));
25608 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25609 ins_encode %{
25610 int opcode = this->ideal_Opcode();
25611 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25612 %}
25613 ins_pipe(pipe_slow);
25614 %}
25615
25616 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25617 %{
25618 predicate(VM_Version::supports_avx10_2());
25619 match(Set dst (MaxHF src1 src2));
25620 match(Set dst (MinHF src1 src2));
25621 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25622 ins_encode %{
25623 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25624 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25625 %}
25626 ins_pipe( pipe_slow );
25627 %}
25628
25629 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25630 %{
25631 predicate(!VM_Version::supports_avx10_2());
25632 match(Set dst (MaxHF src1 src2));
25633 match(Set dst (MinHF src1 src2));
25634 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25635 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25636 ins_encode %{
25637 int opcode = this->ideal_Opcode();
25638 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25639 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25640 %}
25641 ins_pipe( pipe_slow );
25642 %}
25643
25644 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25645 %{
25646 match(Set dst (FmaHF src2 (Binary dst src1)));
25647 effect(DEF dst);
25648 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25649 ins_encode %{
25650 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25651 %}
25652 ins_pipe( pipe_slow );
25653 %}
25654
25655
25656 instruct vector_sqrt_HF_reg(vec dst, vec src)
25657 %{
25658 match(Set dst (SqrtVHF src));
25659 format %{ "vector_sqrt_fp16 $dst, $src" %}
25660 ins_encode %{
25661 int vlen_enc = vector_length_encoding(this);
25662 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25663 %}
25664 ins_pipe(pipe_slow);
25665 %}
25666
25667 instruct vector_sqrt_HF_mem(vec dst, memory src)
25668 %{
25669 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25670 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25671 ins_encode %{
25672 int vlen_enc = vector_length_encoding(this);
25673 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25674 %}
25675 ins_pipe(pipe_slow);
25676 %}
25677
25678 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25679 %{
25680 match(Set dst (AddVHF src1 src2));
25681 match(Set dst (DivVHF src1 src2));
25682 match(Set dst (MulVHF src1 src2));
25683 match(Set dst (SubVHF src1 src2));
25684 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25685 ins_encode %{
25686 int vlen_enc = vector_length_encoding(this);
25687 int opcode = this->ideal_Opcode();
25688 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25689 %}
25690 ins_pipe(pipe_slow);
25691 %}
25692
25693
25694 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25695 %{
25696 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25697 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25698 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25699 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25700 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25701 ins_encode %{
25702 int vlen_enc = vector_length_encoding(this);
25703 int opcode = this->ideal_Opcode();
25704 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25705 %}
25706 ins_pipe(pipe_slow);
25707 %}
25708
25709 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25710 %{
25711 match(Set dst (FmaVHF src2 (Binary dst src1)));
25712 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25713 ins_encode %{
25714 int vlen_enc = vector_length_encoding(this);
25715 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25716 %}
25717 ins_pipe( pipe_slow );
25718 %}
25719
25720 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25721 %{
25722 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25723 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25724 ins_encode %{
25725 int vlen_enc = vector_length_encoding(this);
25726 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25727 %}
25728 ins_pipe( pipe_slow );
25729 %}
25730
25731 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25732 %{
25733 predicate(VM_Version::supports_avx10_2());
25734 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25735 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25736 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25737 ins_encode %{
25738 int vlen_enc = vector_length_encoding(this);
25739 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25740 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25741 %}
25742 ins_pipe( pipe_slow );
25743 %}
25744
25745 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25746 %{
25747 predicate(VM_Version::supports_avx10_2());
25748 match(Set dst (MinVHF src1 src2));
25749 match(Set dst (MaxVHF src1 src2));
25750 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25751 ins_encode %{
25752 int vlen_enc = vector_length_encoding(this);
25753 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25754 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25755 %}
25756 ins_pipe( pipe_slow );
25757 %}
25758
25759 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25760 %{
25761 predicate(!VM_Version::supports_avx10_2());
25762 match(Set dst (MinVHF src1 src2));
25763 match(Set dst (MaxVHF src1 src2));
25764 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25765 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25766 ins_encode %{
25767 int vlen_enc = vector_length_encoding(this);
25768 int opcode = this->ideal_Opcode();
25769 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25770 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25771 %}
25772 ins_pipe( pipe_slow );
25773 %}
25774
25775 //----------PEEPHOLE RULES-----------------------------------------------------
25776 // These must follow all instruction definitions as they use the names
25777 // defined in the instructions definitions.
25778 //
25779 // peeppredicate ( rule_predicate );
25780 // // the predicate unless which the peephole rule will be ignored
25781 //
25782 // peepmatch ( root_instr_name [preceding_instruction]* );
25783 //
25784 // peepprocedure ( procedure_name );
25785 // // provide a procedure name to perform the optimization, the procedure should
25786 // // reside in the architecture dependent peephole file, the method has the
25787 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25788 // // with the arguments being the basic block, the current node index inside the
25789 // // block, the register allocator, the functions upon invoked return a new node
25790 // // defined in peepreplace, and the rules of the nodes appearing in the
25791 // // corresponding peepmatch, the function return true if successful, else
25792 // // return false
25793 //
25794 // peepconstraint %{
25795 // (instruction_number.operand_name relational_op instruction_number.operand_name
25796 // [, ...] );
25797 // // instruction numbers are zero-based using left to right order in peepmatch
25798 //
25799 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25800 // // provide an instruction_number.operand_name for each operand that appears
25801 // // in the replacement instruction's match rule
25802 //
25803 // ---------VM FLAGS---------------------------------------------------------
25804 //
25805 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25806 //
25807 // Each peephole rule is given an identifying number starting with zero and
25808 // increasing by one in the order seen by the parser. An individual peephole
25809 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25810 // on the command-line.
25811 //
25812 // ---------CURRENT LIMITATIONS----------------------------------------------
25813 //
25814 // Only transformations inside a basic block (do we need more for peephole)
25815 //
25816 // ---------EXAMPLE----------------------------------------------------------
25817 //
25818 // // pertinent parts of existing instructions in architecture description
25819 // instruct movI(rRegI dst, rRegI src)
25820 // %{
25821 // match(Set dst (CopyI src));
25822 // %}
25823 //
25824 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25825 // %{
25826 // match(Set dst (AddI dst src));
25827 // effect(KILL cr);
25828 // %}
25829 //
25830 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25831 // %{
25832 // match(Set dst (AddI dst src));
25833 // %}
25834 //
25835 // 1. Simple replacement
25836 // - Only match adjacent instructions in same basic block
25837 // - Only equality constraints
25838 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25839 // - Only one replacement instruction
25840 //
25841 // // Change (inc mov) to lea
25842 // peephole %{
25843 // // lea should only be emitted when beneficial
25844 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25845 // // increment preceded by register-register move
25846 // peepmatch ( incI_rReg movI );
25847 // // require that the destination register of the increment
25848 // // match the destination register of the move
25849 // peepconstraint ( 0.dst == 1.dst );
25850 // // construct a replacement instruction that sets
25851 // // the destination to ( move's source register + one )
25852 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25853 // %}
25854 //
25855 // 2. Procedural replacement
25856 // - More flexible finding relevent nodes
25857 // - More flexible constraints
25858 // - More flexible transformations
25859 // - May utilise architecture-dependent API more effectively
25860 // - Currently only one replacement instruction due to adlc parsing capabilities
25861 //
25862 // // Change (inc mov) to lea
25863 // peephole %{
25864 // // lea should only be emitted when beneficial
25865 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25866 // // the rule numbers of these nodes inside are passed into the function below
25867 // peepmatch ( incI_rReg movI );
25868 // // the method that takes the responsibility of transformation
25869 // peepprocedure ( inc_mov_to_lea );
25870 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25871 // // node is passed into the function above
25872 // peepreplace ( leaI_rReg_immI() );
25873 // %}
25874
25875 // These instructions is not matched by the matcher but used by the peephole
25876 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25877 %{
25878 predicate(false);
25879 match(Set dst (AddI src1 src2));
25880 format %{ "leal $dst, [$src1 + $src2]" %}
25881 ins_encode %{
25882 Register dst = $dst$$Register;
25883 Register src1 = $src1$$Register;
25884 Register src2 = $src2$$Register;
25885 if (src1 != rbp && src1 != r13) {
25886 __ leal(dst, Address(src1, src2, Address::times_1));
25887 } else {
25888 assert(src2 != rbp && src2 != r13, "");
25889 __ leal(dst, Address(src2, src1, Address::times_1));
25890 }
25891 %}
25892 ins_pipe(ialu_reg_reg);
25893 %}
25894
25895 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25896 %{
25897 predicate(false);
25898 match(Set dst (AddI src1 src2));
25899 format %{ "leal $dst, [$src1 + $src2]" %}
25900 ins_encode %{
25901 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25902 %}
25903 ins_pipe(ialu_reg_reg);
25904 %}
25905
25906 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25907 %{
25908 predicate(false);
25909 match(Set dst (LShiftI src shift));
25910 format %{ "leal $dst, [$src << $shift]" %}
25911 ins_encode %{
25912 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25913 Register src = $src$$Register;
25914 if (scale == Address::times_2 && src != rbp && src != r13) {
25915 __ leal($dst$$Register, Address(src, src, Address::times_1));
25916 } else {
25917 __ leal($dst$$Register, Address(noreg, src, scale));
25918 }
25919 %}
25920 ins_pipe(ialu_reg_reg);
25921 %}
25922
25923 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25924 %{
25925 predicate(false);
25926 match(Set dst (AddL src1 src2));
25927 format %{ "leaq $dst, [$src1 + $src2]" %}
25928 ins_encode %{
25929 Register dst = $dst$$Register;
25930 Register src1 = $src1$$Register;
25931 Register src2 = $src2$$Register;
25932 if (src1 != rbp && src1 != r13) {
25933 __ leaq(dst, Address(src1, src2, Address::times_1));
25934 } else {
25935 assert(src2 != rbp && src2 != r13, "");
25936 __ leaq(dst, Address(src2, src1, Address::times_1));
25937 }
25938 %}
25939 ins_pipe(ialu_reg_reg);
25940 %}
25941
25942 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25943 %{
25944 predicate(false);
25945 match(Set dst (AddL src1 src2));
25946 format %{ "leaq $dst, [$src1 + $src2]" %}
25947 ins_encode %{
25948 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25949 %}
25950 ins_pipe(ialu_reg_reg);
25951 %}
25952
25953 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25954 %{
25955 predicate(false);
25956 match(Set dst (LShiftL src shift));
25957 format %{ "leaq $dst, [$src << $shift]" %}
25958 ins_encode %{
25959 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25960 Register src = $src$$Register;
25961 if (scale == Address::times_2 && src != rbp && src != r13) {
25962 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25963 } else {
25964 __ leaq($dst$$Register, Address(noreg, src, scale));
25965 }
25966 %}
25967 ins_pipe(ialu_reg_reg);
25968 %}
25969
25970 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25971 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25972 // processors with at least partial ALU support for lea
25973 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25974 // beneficial for processors with full ALU support
25975 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25976
25977 peephole
25978 %{
25979 peeppredicate(VM_Version::supports_fast_2op_lea());
25980 peepmatch (addI_rReg);
25981 peepprocedure (lea_coalesce_reg);
25982 peepreplace (leaI_rReg_rReg_peep());
25983 %}
25984
25985 peephole
25986 %{
25987 peeppredicate(VM_Version::supports_fast_2op_lea());
25988 peepmatch (addI_rReg_imm);
25989 peepprocedure (lea_coalesce_imm);
25990 peepreplace (leaI_rReg_immI_peep());
25991 %}
25992
25993 peephole
25994 %{
25995 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25996 VM_Version::is_intel_cascade_lake());
25997 peepmatch (incI_rReg);
25998 peepprocedure (lea_coalesce_imm);
25999 peepreplace (leaI_rReg_immI_peep());
26000 %}
26001
26002 peephole
26003 %{
26004 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26005 VM_Version::is_intel_cascade_lake());
26006 peepmatch (decI_rReg);
26007 peepprocedure (lea_coalesce_imm);
26008 peepreplace (leaI_rReg_immI_peep());
26009 %}
26010
26011 peephole
26012 %{
26013 peeppredicate(VM_Version::supports_fast_2op_lea());
26014 peepmatch (salI_rReg_immI2);
26015 peepprocedure (lea_coalesce_imm);
26016 peepreplace (leaI_rReg_immI2_peep());
26017 %}
26018
26019 peephole
26020 %{
26021 peeppredicate(VM_Version::supports_fast_2op_lea());
26022 peepmatch (addL_rReg);
26023 peepprocedure (lea_coalesce_reg);
26024 peepreplace (leaL_rReg_rReg_peep());
26025 %}
26026
26027 peephole
26028 %{
26029 peeppredicate(VM_Version::supports_fast_2op_lea());
26030 peepmatch (addL_rReg_imm);
26031 peepprocedure (lea_coalesce_imm);
26032 peepreplace (leaL_rReg_immL32_peep());
26033 %}
26034
26035 peephole
26036 %{
26037 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26038 VM_Version::is_intel_cascade_lake());
26039 peepmatch (incL_rReg);
26040 peepprocedure (lea_coalesce_imm);
26041 peepreplace (leaL_rReg_immL32_peep());
26042 %}
26043
26044 peephole
26045 %{
26046 peeppredicate(VM_Version::supports_fast_3op_lea() ||
26047 VM_Version::is_intel_cascade_lake());
26048 peepmatch (decL_rReg);
26049 peepprocedure (lea_coalesce_imm);
26050 peepreplace (leaL_rReg_immL32_peep());
26051 %}
26052
26053 peephole
26054 %{
26055 peeppredicate(VM_Version::supports_fast_2op_lea());
26056 peepmatch (salL_rReg_immI2);
26057 peepprocedure (lea_coalesce_imm);
26058 peepreplace (leaL_rReg_immI2_peep());
26059 %}
26060
26061 peephole
26062 %{
26063 peepmatch (leaPCompressedOopOffset);
26064 peepprocedure (lea_remove_redundant);
26065 %}
26066
26067 peephole
26068 %{
26069 peepmatch (leaP8Narrow);
26070 peepprocedure (lea_remove_redundant);
26071 %}
26072
26073 peephole
26074 %{
26075 peepmatch (leaP32Narrow);
26076 peepprocedure (lea_remove_redundant);
26077 %}
26078
26079 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
26080 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
26081
26082 //int variant
26083 peephole
26084 %{
26085 peepmatch (testI_reg);
26086 peepprocedure (test_may_remove);
26087 %}
26088
26089 //long variant
26090 peephole
26091 %{
26092 peepmatch (testL_reg);
26093 peepprocedure (test_may_remove);
26094 %}
26095
26096
26097 //----------SMARTSPILL RULES---------------------------------------------------
26098 // These must follow all instruction definitions as they use the names
26099 // defined in the instructions definitions.