1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2637 assert(EnableVectorSupport, "sanity");
2638 int lo = XMM0_num;
2639 int hi = XMM0b_num;
2640 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2641 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2642 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2643 return OptoRegPair(hi, lo);
2644 }
2645
2646 // Is this branch offset short enough that a short branch can be used?
2647 //
2648 // NOTE: If the platform does not provide any short branch variants, then
2649 // this method should return false for offset 0.
2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2651 // The passed offset is relative to address of the branch.
2652 // On 86 a branch displacement is calculated relative to address
2653 // of a next instruction.
2654 offset -= br_size;
2655
2656 // the short version of jmpConUCF2 contains multiple branches,
2657 // making the reach slightly less
2658 if (rule == jmpConUCF2_rule)
2659 return (-126 <= offset && offset <= 125);
2660 return (-128 <= offset && offset <= 127);
2661 }
2662
2663 // Return whether or not this register is ever used as an argument.
2664 // This function is used on startup to build the trampoline stubs in
2665 // generateOptoStub. Registers not mentioned will be killed by the VM
2666 // call in the trampoline, and arguments in those registers not be
2667 // available to the callee.
2668 bool Matcher::can_be_java_arg(int reg)
2669 {
2670 return
2671 reg == RDI_num || reg == RDI_H_num ||
2672 reg == RSI_num || reg == RSI_H_num ||
2673 reg == RDX_num || reg == RDX_H_num ||
2674 reg == RCX_num || reg == RCX_H_num ||
2675 reg == R8_num || reg == R8_H_num ||
2676 reg == R9_num || reg == R9_H_num ||
2677 reg == R12_num || reg == R12_H_num ||
2678 reg == XMM0_num || reg == XMM0b_num ||
2679 reg == XMM1_num || reg == XMM1b_num ||
2680 reg == XMM2_num || reg == XMM2b_num ||
2681 reg == XMM3_num || reg == XMM3b_num ||
2682 reg == XMM4_num || reg == XMM4b_num ||
2683 reg == XMM5_num || reg == XMM5b_num ||
2684 reg == XMM6_num || reg == XMM6b_num ||
2685 reg == XMM7_num || reg == XMM7b_num;
2686 }
2687
2688 bool Matcher::is_spillable_arg(int reg)
2689 {
2690 return can_be_java_arg(reg);
2691 }
2692
2693 uint Matcher::int_pressure_limit()
2694 {
2695 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2696 }
2697
2698 uint Matcher::float_pressure_limit()
2699 {
2700 // After experiment around with different values, the following default threshold
2701 // works best for LCM's register pressure scheduling on x64.
2702 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2703 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2704 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2705 }
2706
2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2708 // In 64 bit mode a code which use multiply when
2709 // devisor is constant is faster than hardware
2710 // DIV instruction (it uses MulHiL).
2711 return false;
2712 }
2713
2714 // Register for DIVI projection of divmodI
2715 const RegMask& Matcher::divI_proj_mask() {
2716 return INT_RAX_REG_mask();
2717 }
2718
2719 // Register for MODI projection of divmodI
2720 const RegMask& Matcher::modI_proj_mask() {
2721 return INT_RDX_REG_mask();
2722 }
2723
2724 // Register for DIVL projection of divmodL
2725 const RegMask& Matcher::divL_proj_mask() {
2726 return LONG_RAX_REG_mask();
2727 }
2728
2729 // Register for MODL projection of divmodL
2730 const RegMask& Matcher::modL_proj_mask() {
2731 return LONG_RDX_REG_mask();
2732 }
2733
2734 %}
2735
2736 source_hpp %{
2737 // Header information of the source block.
2738 // Method declarations/definitions which are used outside
2739 // the ad-scope can conveniently be defined here.
2740 //
2741 // To keep related declarations/definitions/uses close together,
2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2743
2744 #include "runtime/vm_version.hpp"
2745
2746 class NativeJump;
2747
2748 class CallStubImpl {
2749
2750 //--------------------------------------------------------------
2751 //---< Used for optimization in Compile::shorten_branches >---
2752 //--------------------------------------------------------------
2753
2754 public:
2755 // Size of call trampoline stub.
2756 static uint size_call_trampoline() {
2757 return 0; // no call trampolines on this platform
2758 }
2759
2760 // number of relocations needed by a call trampoline stub
2761 static uint reloc_call_trampoline() {
2762 return 0; // no call trampolines on this platform
2763 }
2764 };
2765
2766 class HandlerImpl {
2767
2768 public:
2769
2770 static int emit_deopt_handler(C2_MacroAssembler* masm);
2771
2772 static uint size_deopt_handler() {
2773 // one call and one jmp.
2774 return 10;
2775 }
2776 };
2777
2778 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2779 switch(bytes) {
2780 case 4: // fall-through
2781 case 8: // fall-through
2782 case 16: return Assembler::AVX_128bit;
2783 case 32: return Assembler::AVX_256bit;
2784 case 64: return Assembler::AVX_512bit;
2785
2786 default: {
2787 ShouldNotReachHere();
2788 return Assembler::AVX_NoVec;
2789 }
2790 }
2791 }
2792
2793 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2794 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2795 }
2796
2797 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2798 uint def_idx = use->operand_index(opnd);
2799 Node* def = use->in(def_idx);
2800 return vector_length_encoding(def);
2801 }
2802
2803 static inline bool is_vector_popcount_predicate(BasicType bt) {
2804 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2805 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2806 }
2807
2808 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2809 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2810 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2811 }
2812
2813 class Node::PD {
2814 public:
2815 enum NodeFlags {
2816 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2817 Flag_sets_carry_flag = Node::_last_flag << 2,
2818 Flag_sets_parity_flag = Node::_last_flag << 3,
2819 Flag_sets_zero_flag = Node::_last_flag << 4,
2820 Flag_sets_overflow_flag = Node::_last_flag << 5,
2821 Flag_sets_sign_flag = Node::_last_flag << 6,
2822 Flag_clears_carry_flag = Node::_last_flag << 7,
2823 Flag_clears_parity_flag = Node::_last_flag << 8,
2824 Flag_clears_zero_flag = Node::_last_flag << 9,
2825 Flag_clears_overflow_flag = Node::_last_flag << 10,
2826 Flag_clears_sign_flag = Node::_last_flag << 11,
2827 _last_flag = Flag_clears_sign_flag
2828 };
2829 };
2830
2831 %} // end source_hpp
2832
2833 source %{
2834
2835 #include "opto/addnode.hpp"
2836 #include "c2_intelJccErratum_x86.hpp"
2837
2838 void PhaseOutput::pd_perform_mach_node_analysis() {
2839 if (VM_Version::has_intel_jcc_erratum()) {
2840 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2841 _buf_sizes._code += extra_padding;
2842 }
2843 }
2844
2845 int MachNode::pd_alignment_required() const {
2846 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2847 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2848 return IntelJccErratum::largest_jcc_size() + 1;
2849 } else {
2850 return 1;
2851 }
2852 }
2853
2854 int MachNode::compute_padding(int current_offset) const {
2855 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2856 Compile* C = Compile::current();
2857 PhaseOutput* output = C->output();
2858 Block* block = output->block();
2859 int index = output->index();
2860 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2861 } else {
2862 return 0;
2863 }
2864 }
2865
2866 // Emit deopt handler code.
2867 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2868
2869 // Note that the code buffer's insts_mark is always relative to insts.
2870 // That's why we must use the macroassembler to generate a handler.
2871 address base = __ start_a_stub(size_deopt_handler());
2872 if (base == nullptr) {
2873 ciEnv::current()->record_failure("CodeCache is full");
2874 return 0; // CodeBuffer::expand failed
2875 }
2876 int offset = __ offset();
2877
2878 Label start;
2879 __ bind(start);
2880
2881 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2882
2883 int entry_offset = __ offset();
2884
2885 __ jmp(start);
2886
2887 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2888 __ end_a_stub();
2889 return entry_offset;
2890 }
2891
2892 static Assembler::Width widthForType(BasicType bt) {
2893 if (bt == T_BYTE) {
2894 return Assembler::B;
2895 } else if (bt == T_SHORT) {
2896 return Assembler::W;
2897 } else if (bt == T_INT) {
2898 return Assembler::D;
2899 } else {
2900 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2901 return Assembler::Q;
2902 }
2903 }
2904
2905 //=============================================================================
2906
2907 // Float masks come from different places depending on platform.
2908 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2909 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2910 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2911 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2912 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2913 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2914 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2915 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2916 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2917 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2918 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2919 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2920 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2921 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2922 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2923 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2924 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2925 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2926 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2927
2928 //=============================================================================
2929 bool Matcher::match_rule_supported(int opcode) {
2930 if (!has_match_rule(opcode)) {
2931 return false; // no match rule present
2932 }
2933 switch (opcode) {
2934 case Op_AbsVL:
2935 case Op_StoreVectorScatter:
2936 if (UseAVX < 3) {
2937 return false;
2938 }
2939 break;
2940 case Op_PopCountI:
2941 case Op_PopCountL:
2942 if (!UsePopCountInstruction) {
2943 return false;
2944 }
2945 break;
2946 case Op_PopCountVI:
2947 if (UseAVX < 2) {
2948 return false;
2949 }
2950 break;
2951 case Op_CompressV:
2952 case Op_ExpandV:
2953 case Op_PopCountVL:
2954 if (UseAVX < 2) {
2955 return false;
2956 }
2957 break;
2958 case Op_MulVI:
2959 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
2960 return false;
2961 }
2962 break;
2963 case Op_MulVL:
2964 if (UseSSE < 4) { // only with SSE4_1 or AVX
2965 return false;
2966 }
2967 break;
2968 case Op_MulReductionVL:
2969 if (VM_Version::supports_avx512dq() == false) {
2970 return false;
2971 }
2972 break;
2973 case Op_AbsVB:
2974 case Op_AbsVS:
2975 case Op_AbsVI:
2976 case Op_AddReductionVI:
2977 case Op_AndReductionV:
2978 case Op_OrReductionV:
2979 case Op_XorReductionV:
2980 if (UseSSE < 3) { // requires at least SSSE3
2981 return false;
2982 }
2983 break;
2984 case Op_MaxHF:
2985 case Op_MinHF:
2986 if (!VM_Version::supports_avx512vlbw()) {
2987 return false;
2988 } // fallthrough
2989 case Op_AddHF:
2990 case Op_DivHF:
2991 case Op_FmaHF:
2992 case Op_MulHF:
2993 case Op_ReinterpretS2HF:
2994 case Op_ReinterpretHF2S:
2995 case Op_SubHF:
2996 case Op_SqrtHF:
2997 if (!VM_Version::supports_avx512_fp16()) {
2998 return false;
2999 }
3000 break;
3001 case Op_VectorLoadShuffle:
3002 case Op_VectorRearrange:
3003 case Op_MulReductionVI:
3004 if (UseSSE < 4) { // requires at least SSE4
3005 return false;
3006 }
3007 break;
3008 case Op_IsInfiniteF:
3009 case Op_IsInfiniteD:
3010 if (!VM_Version::supports_avx512dq()) {
3011 return false;
3012 }
3013 break;
3014 case Op_SqrtVD:
3015 case Op_SqrtVF:
3016 case Op_VectorMaskCmp:
3017 case Op_VectorCastB2X:
3018 case Op_VectorCastS2X:
3019 case Op_VectorCastI2X:
3020 case Op_VectorCastL2X:
3021 case Op_VectorCastF2X:
3022 case Op_VectorCastD2X:
3023 case Op_VectorUCastB2X:
3024 case Op_VectorUCastS2X:
3025 case Op_VectorUCastI2X:
3026 case Op_VectorMaskCast:
3027 if (UseAVX < 1) { // enabled for AVX only
3028 return false;
3029 }
3030 break;
3031 case Op_PopulateIndex:
3032 if (UseAVX < 2) {
3033 return false;
3034 }
3035 break;
3036 case Op_RoundVF:
3037 if (UseAVX < 2) { // enabled for AVX2 only
3038 return false;
3039 }
3040 break;
3041 case Op_RoundVD:
3042 if (UseAVX < 3) {
3043 return false; // enabled for AVX3 only
3044 }
3045 break;
3046 case Op_CompareAndSwapL:
3047 case Op_CompareAndSwapP:
3048 break;
3049 case Op_StrIndexOf:
3050 if (!UseSSE42Intrinsics) {
3051 return false;
3052 }
3053 break;
3054 case Op_StrIndexOfChar:
3055 if (!UseSSE42Intrinsics) {
3056 return false;
3057 }
3058 break;
3059 case Op_OnSpinWait:
3060 if (VM_Version::supports_on_spin_wait() == false) {
3061 return false;
3062 }
3063 break;
3064 case Op_MulVB:
3065 case Op_LShiftVB:
3066 case Op_RShiftVB:
3067 case Op_URShiftVB:
3068 case Op_VectorInsert:
3069 case Op_VectorLoadMask:
3070 case Op_VectorStoreMask:
3071 case Op_VectorBlend:
3072 if (UseSSE < 4) {
3073 return false;
3074 }
3075 break;
3076 case Op_MaxD:
3077 case Op_MaxF:
3078 case Op_MinD:
3079 case Op_MinF:
3080 if (UseAVX < 1) { // enabled for AVX only
3081 return false;
3082 }
3083 break;
3084 case Op_CacheWB:
3085 case Op_CacheWBPreSync:
3086 case Op_CacheWBPostSync:
3087 if (!VM_Version::supports_data_cache_line_flush()) {
3088 return false;
3089 }
3090 break;
3091 case Op_ExtractB:
3092 case Op_ExtractL:
3093 case Op_ExtractI:
3094 case Op_RoundDoubleMode:
3095 if (UseSSE < 4) {
3096 return false;
3097 }
3098 break;
3099 case Op_RoundDoubleModeV:
3100 if (VM_Version::supports_avx() == false) {
3101 return false; // 128bit vroundpd is not available
3102 }
3103 break;
3104 case Op_LoadVectorGather:
3105 case Op_LoadVectorGatherMasked:
3106 if (UseAVX < 2) {
3107 return false;
3108 }
3109 break;
3110 case Op_FmaF:
3111 case Op_FmaD:
3112 case Op_FmaVD:
3113 case Op_FmaVF:
3114 if (!UseFMA) {
3115 return false;
3116 }
3117 break;
3118 case Op_MacroLogicV:
3119 if (UseAVX < 3 || !UseVectorMacroLogic) {
3120 return false;
3121 }
3122 break;
3123
3124 case Op_VectorCmpMasked:
3125 case Op_VectorMaskGen:
3126 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3127 return false;
3128 }
3129 break;
3130 case Op_VectorMaskFirstTrue:
3131 case Op_VectorMaskLastTrue:
3132 case Op_VectorMaskTrueCount:
3133 case Op_VectorMaskToLong:
3134 if (UseAVX < 1) {
3135 return false;
3136 }
3137 break;
3138 case Op_RoundF:
3139 case Op_RoundD:
3140 break;
3141 case Op_CopySignD:
3142 case Op_CopySignF:
3143 if (UseAVX < 3) {
3144 return false;
3145 }
3146 if (!VM_Version::supports_avx512vl()) {
3147 return false;
3148 }
3149 break;
3150 case Op_CompressBits:
3151 case Op_ExpandBits:
3152 if (!VM_Version::supports_bmi2()) {
3153 return false;
3154 }
3155 break;
3156 case Op_CompressM:
3157 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3158 return false;
3159 }
3160 break;
3161 case Op_ConvF2HF:
3162 case Op_ConvHF2F:
3163 if (!VM_Version::supports_float16()) {
3164 return false;
3165 }
3166 break;
3167 case Op_VectorCastF2HF:
3168 case Op_VectorCastHF2F:
3169 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3170 return false;
3171 }
3172 break;
3173 }
3174 return true; // Match rules are supported by default.
3175 }
3176
3177 //------------------------------------------------------------------------
3178
3179 static inline bool is_pop_count_instr_target(BasicType bt) {
3180 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3181 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3182 }
3183
3184 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3185 return match_rule_supported_vector(opcode, vlen, bt);
3186 }
3187
3188 // Identify extra cases that we might want to provide match rules for vector nodes and
3189 // other intrinsics guarded with vector length (vlen) and element type (bt).
3190 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3191 if (!match_rule_supported(opcode)) {
3192 return false;
3193 }
3194 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3195 // * SSE2 supports 128bit vectors for all types;
3196 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3197 // * AVX2 supports 256bit vectors for all types;
3198 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3199 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3200 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3201 // And MaxVectorSize is taken into account as well.
3202 if (!vector_size_supported(bt, vlen)) {
3203 return false;
3204 }
3205 // Special cases which require vector length follow:
3206 // * implementation limitations
3207 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3208 // * 128bit vroundpd instruction is present only in AVX1
3209 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3210 switch (opcode) {
3211 case Op_MaxVHF:
3212 case Op_MinVHF:
3213 if (!VM_Version::supports_avx512bw()) {
3214 return false;
3215 }
3216 case Op_AddVHF:
3217 case Op_DivVHF:
3218 case Op_FmaVHF:
3219 case Op_MulVHF:
3220 case Op_SubVHF:
3221 case Op_SqrtVHF:
3222 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3223 return false;
3224 }
3225 if (!VM_Version::supports_avx512_fp16()) {
3226 return false;
3227 }
3228 break;
3229 case Op_AbsVF:
3230 case Op_NegVF:
3231 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3232 return false; // 512bit vandps and vxorps are not available
3233 }
3234 break;
3235 case Op_AbsVD:
3236 case Op_NegVD:
3237 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3238 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3239 }
3240 break;
3241 case Op_RotateRightV:
3242 case Op_RotateLeftV:
3243 if (bt != T_INT && bt != T_LONG) {
3244 return false;
3245 } // fallthrough
3246 case Op_MacroLogicV:
3247 if (!VM_Version::supports_evex() ||
3248 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3249 return false;
3250 }
3251 break;
3252 case Op_ClearArray:
3253 case Op_VectorMaskGen:
3254 case Op_VectorCmpMasked:
3255 if (!VM_Version::supports_avx512bw()) {
3256 return false;
3257 }
3258 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3259 return false;
3260 }
3261 break;
3262 case Op_LoadVectorMasked:
3263 case Op_StoreVectorMasked:
3264 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3265 return false;
3266 }
3267 break;
3268 case Op_UMinV:
3269 case Op_UMaxV:
3270 if (UseAVX == 0) {
3271 return false;
3272 }
3273 break;
3274 case Op_MaxV:
3275 case Op_MinV:
3276 if (UseSSE < 4 && is_integral_type(bt)) {
3277 return false;
3278 }
3279 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3280 // Float/Double intrinsics are enabled for AVX family currently.
3281 if (UseAVX == 0) {
3282 return false;
3283 }
3284 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3285 return false;
3286 }
3287 }
3288 break;
3289 case Op_CallLeafVector:
3290 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3291 return false;
3292 }
3293 break;
3294 case Op_AddReductionVI:
3295 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3296 return false;
3297 }
3298 // fallthrough
3299 case Op_AndReductionV:
3300 case Op_OrReductionV:
3301 case Op_XorReductionV:
3302 if (is_subword_type(bt) && (UseSSE < 4)) {
3303 return false;
3304 }
3305 break;
3306 case Op_MinReductionV:
3307 case Op_MaxReductionV:
3308 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3309 return false;
3310 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3311 return false;
3312 }
3313 // Float/Double intrinsics enabled for AVX family.
3314 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3315 return false;
3316 }
3317 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3318 return false;
3319 }
3320 break;
3321 case Op_VectorTest:
3322 if (UseSSE < 4) {
3323 return false; // Implementation limitation
3324 } else if (size_in_bits < 32) {
3325 return false; // Implementation limitation
3326 }
3327 break;
3328 case Op_VectorLoadShuffle:
3329 case Op_VectorRearrange:
3330 if(vlen == 2) {
3331 return false; // Implementation limitation due to how shuffle is loaded
3332 } else if (size_in_bits == 256 && UseAVX < 2) {
3333 return false; // Implementation limitation
3334 }
3335 break;
3336 case Op_VectorLoadMask:
3337 case Op_VectorMaskCast:
3338 if (size_in_bits == 256 && UseAVX < 2) {
3339 return false; // Implementation limitation
3340 }
3341 // fallthrough
3342 case Op_VectorStoreMask:
3343 if (vlen == 2) {
3344 return false; // Implementation limitation
3345 }
3346 break;
3347 case Op_PopulateIndex:
3348 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3349 return false;
3350 }
3351 break;
3352 case Op_VectorCastB2X:
3353 case Op_VectorCastS2X:
3354 case Op_VectorCastI2X:
3355 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3356 return false;
3357 }
3358 break;
3359 case Op_VectorCastL2X:
3360 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3361 return false;
3362 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3363 return false;
3364 }
3365 break;
3366 case Op_VectorCastF2X: {
3367 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3368 // happen after intermediate conversion to integer and special handling
3369 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3370 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3371 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3372 return false;
3373 }
3374 }
3375 // fallthrough
3376 case Op_VectorCastD2X:
3377 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3378 return false;
3379 }
3380 break;
3381 case Op_VectorCastF2HF:
3382 case Op_VectorCastHF2F:
3383 if (!VM_Version::supports_f16c() &&
3384 ((!VM_Version::supports_evex() ||
3385 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3386 return false;
3387 }
3388 break;
3389 case Op_RoundVD:
3390 if (!VM_Version::supports_avx512dq()) {
3391 return false;
3392 }
3393 break;
3394 case Op_MulReductionVI:
3395 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3396 return false;
3397 }
3398 break;
3399 case Op_LoadVectorGatherMasked:
3400 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3401 return false;
3402 }
3403 if (is_subword_type(bt) &&
3404 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3405 (size_in_bits < 64) ||
3406 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3407 return false;
3408 }
3409 break;
3410 case Op_StoreVectorScatterMasked:
3411 case Op_StoreVectorScatter:
3412 if (is_subword_type(bt)) {
3413 return false;
3414 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3415 return false;
3416 }
3417 // fallthrough
3418 case Op_LoadVectorGather:
3419 if (!is_subword_type(bt) && size_in_bits == 64) {
3420 return false;
3421 }
3422 if (is_subword_type(bt) && size_in_bits < 64) {
3423 return false;
3424 }
3425 break;
3426 case Op_SaturatingAddV:
3427 case Op_SaturatingSubV:
3428 if (UseAVX < 1) {
3429 return false; // Implementation limitation
3430 }
3431 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3432 return false;
3433 }
3434 break;
3435 case Op_SelectFromTwoVector:
3436 if (size_in_bits < 128) {
3437 return false;
3438 }
3439 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3440 return false;
3441 }
3442 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3443 return false;
3444 }
3445 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3446 return false;
3447 }
3448 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3449 return false;
3450 }
3451 break;
3452 case Op_MaskAll:
3453 if (!VM_Version::supports_evex()) {
3454 return false;
3455 }
3456 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3457 return false;
3458 }
3459 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3460 return false;
3461 }
3462 break;
3463 case Op_VectorMaskCmp:
3464 if (vlen < 2 || size_in_bits < 32) {
3465 return false;
3466 }
3467 break;
3468 case Op_CompressM:
3469 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3470 return false;
3471 }
3472 break;
3473 case Op_CompressV:
3474 case Op_ExpandV:
3475 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3476 return false;
3477 }
3478 if (size_in_bits < 128 ) {
3479 return false;
3480 }
3481 case Op_VectorLongToMask:
3482 if (UseAVX < 1) {
3483 return false;
3484 }
3485 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3486 return false;
3487 }
3488 break;
3489 case Op_SignumVD:
3490 case Op_SignumVF:
3491 if (UseAVX < 1) {
3492 return false;
3493 }
3494 break;
3495 case Op_PopCountVI:
3496 case Op_PopCountVL: {
3497 if (!is_pop_count_instr_target(bt) &&
3498 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3499 return false;
3500 }
3501 }
3502 break;
3503 case Op_ReverseV:
3504 case Op_ReverseBytesV:
3505 if (UseAVX < 2) {
3506 return false;
3507 }
3508 break;
3509 case Op_CountTrailingZerosV:
3510 case Op_CountLeadingZerosV:
3511 if (UseAVX < 2) {
3512 return false;
3513 }
3514 break;
3515 }
3516 return true; // Per default match rules are supported.
3517 }
3518
3519 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3520 // ADLC based match_rule_supported routine checks for the existence of pattern based
3521 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3522 // of their non-masked counterpart with mask edge being the differentiator.
3523 // This routine does a strict check on the existence of masked operation patterns
3524 // by returning a default false value for all the other opcodes apart from the
3525 // ones whose masked instruction patterns are defined in this file.
3526 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3527 return false;
3528 }
3529
3530 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3531 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3532 return false;
3533 }
3534 switch(opcode) {
3535 // Unary masked operations
3536 case Op_AbsVB:
3537 case Op_AbsVS:
3538 if(!VM_Version::supports_avx512bw()) {
3539 return false; // Implementation limitation
3540 }
3541 case Op_AbsVI:
3542 case Op_AbsVL:
3543 return true;
3544
3545 // Ternary masked operations
3546 case Op_FmaVF:
3547 case Op_FmaVD:
3548 return true;
3549
3550 case Op_MacroLogicV:
3551 if(bt != T_INT && bt != T_LONG) {
3552 return false;
3553 }
3554 return true;
3555
3556 // Binary masked operations
3557 case Op_AddVB:
3558 case Op_AddVS:
3559 case Op_SubVB:
3560 case Op_SubVS:
3561 case Op_MulVS:
3562 case Op_LShiftVS:
3563 case Op_RShiftVS:
3564 case Op_URShiftVS:
3565 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3566 if (!VM_Version::supports_avx512bw()) {
3567 return false; // Implementation limitation
3568 }
3569 return true;
3570
3571 case Op_MulVL:
3572 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3573 if (!VM_Version::supports_avx512dq()) {
3574 return false; // Implementation limitation
3575 }
3576 return true;
3577
3578 case Op_AndV:
3579 case Op_OrV:
3580 case Op_XorV:
3581 case Op_RotateRightV:
3582 case Op_RotateLeftV:
3583 if (bt != T_INT && bt != T_LONG) {
3584 return false; // Implementation limitation
3585 }
3586 return true;
3587
3588 case Op_VectorLoadMask:
3589 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3590 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3591 return false;
3592 }
3593 return true;
3594
3595 case Op_AddVI:
3596 case Op_AddVL:
3597 case Op_AddVF:
3598 case Op_AddVD:
3599 case Op_SubVI:
3600 case Op_SubVL:
3601 case Op_SubVF:
3602 case Op_SubVD:
3603 case Op_MulVI:
3604 case Op_MulVF:
3605 case Op_MulVD:
3606 case Op_DivVF:
3607 case Op_DivVD:
3608 case Op_SqrtVF:
3609 case Op_SqrtVD:
3610 case Op_LShiftVI:
3611 case Op_LShiftVL:
3612 case Op_RShiftVI:
3613 case Op_RShiftVL:
3614 case Op_URShiftVI:
3615 case Op_URShiftVL:
3616 case Op_LoadVectorMasked:
3617 case Op_StoreVectorMasked:
3618 case Op_LoadVectorGatherMasked:
3619 case Op_StoreVectorScatterMasked:
3620 return true;
3621
3622 case Op_UMinV:
3623 case Op_UMaxV:
3624 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3625 return false;
3626 } // fallthrough
3627 case Op_MaxV:
3628 case Op_MinV:
3629 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3630 return false; // Implementation limitation
3631 }
3632 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3633 return false; // Implementation limitation
3634 }
3635 return true;
3636 case Op_SaturatingAddV:
3637 case Op_SaturatingSubV:
3638 if (!is_subword_type(bt)) {
3639 return false;
3640 }
3641 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3642 return false; // Implementation limitation
3643 }
3644 return true;
3645
3646 case Op_VectorMaskCmp:
3647 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3648 return false; // Implementation limitation
3649 }
3650 return true;
3651
3652 case Op_VectorRearrange:
3653 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3654 return false; // Implementation limitation
3655 }
3656 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3657 return false; // Implementation limitation
3658 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3659 return false; // Implementation limitation
3660 }
3661 return true;
3662
3663 // Binary Logical operations
3664 case Op_AndVMask:
3665 case Op_OrVMask:
3666 case Op_XorVMask:
3667 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3668 return false; // Implementation limitation
3669 }
3670 return true;
3671
3672 case Op_PopCountVI:
3673 case Op_PopCountVL:
3674 if (!is_pop_count_instr_target(bt)) {
3675 return false;
3676 }
3677 return true;
3678
3679 case Op_MaskAll:
3680 return true;
3681
3682 case Op_CountLeadingZerosV:
3683 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3684 return true;
3685 }
3686 default:
3687 return false;
3688 }
3689 }
3690
3691 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3692 return false;
3693 }
3694
3695 // Return true if Vector::rearrange needs preparation of the shuffle argument
3696 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3697 switch (elem_bt) {
3698 case T_BYTE: return false;
3699 case T_SHORT: return !VM_Version::supports_avx512bw();
3700 case T_INT: return !VM_Version::supports_avx();
3701 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3702 default:
3703 ShouldNotReachHere();
3704 return false;
3705 }
3706 }
3707
3708 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3709 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3710 bool legacy = (generic_opnd->opcode() == LEGVEC);
3711 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3712 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3713 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3714 return new legVecZOper();
3715 }
3716 if (legacy) {
3717 switch (ideal_reg) {
3718 case Op_VecS: return new legVecSOper();
3719 case Op_VecD: return new legVecDOper();
3720 case Op_VecX: return new legVecXOper();
3721 case Op_VecY: return new legVecYOper();
3722 case Op_VecZ: return new legVecZOper();
3723 }
3724 } else {
3725 switch (ideal_reg) {
3726 case Op_VecS: return new vecSOper();
3727 case Op_VecD: return new vecDOper();
3728 case Op_VecX: return new vecXOper();
3729 case Op_VecY: return new vecYOper();
3730 case Op_VecZ: return new vecZOper();
3731 }
3732 }
3733 ShouldNotReachHere();
3734 return nullptr;
3735 }
3736
3737 bool Matcher::is_reg2reg_move(MachNode* m) {
3738 switch (m->rule()) {
3739 case MoveVec2Leg_rule:
3740 case MoveLeg2Vec_rule:
3741 case MoveF2VL_rule:
3742 case MoveF2LEG_rule:
3743 case MoveVL2F_rule:
3744 case MoveLEG2F_rule:
3745 case MoveD2VL_rule:
3746 case MoveD2LEG_rule:
3747 case MoveVL2D_rule:
3748 case MoveLEG2D_rule:
3749 return true;
3750 default:
3751 return false;
3752 }
3753 }
3754
3755 bool Matcher::is_generic_vector(MachOper* opnd) {
3756 switch (opnd->opcode()) {
3757 case VEC:
3758 case LEGVEC:
3759 return true;
3760 default:
3761 return false;
3762 }
3763 }
3764
3765 //------------------------------------------------------------------------
3766
3767 const RegMask* Matcher::predicate_reg_mask(void) {
3768 return &_VECTMASK_REG_mask;
3769 }
3770
3771 // Max vector size in bytes. 0 if not supported.
3772 int Matcher::vector_width_in_bytes(BasicType bt) {
3773 assert(is_java_primitive(bt), "only primitive type vectors");
3774 // SSE2 supports 128bit vectors for all types.
3775 // AVX2 supports 256bit vectors for all types.
3776 // AVX2/EVEX supports 512bit vectors for all types.
3777 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3778 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3779 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3780 size = (UseAVX > 2) ? 64 : 32;
3781 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3782 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3783 // Use flag to limit vector size.
3784 size = MIN2(size,(int)MaxVectorSize);
3785 // Minimum 2 values in vector (or 4 for bytes).
3786 switch (bt) {
3787 case T_DOUBLE:
3788 case T_LONG:
3789 if (size < 16) return 0;
3790 break;
3791 case T_FLOAT:
3792 case T_INT:
3793 if (size < 8) return 0;
3794 break;
3795 case T_BOOLEAN:
3796 if (size < 4) return 0;
3797 break;
3798 case T_CHAR:
3799 if (size < 4) return 0;
3800 break;
3801 case T_BYTE:
3802 if (size < 4) return 0;
3803 break;
3804 case T_SHORT:
3805 if (size < 4) return 0;
3806 break;
3807 default:
3808 ShouldNotReachHere();
3809 }
3810 return size;
3811 }
3812
3813 // Limits on vector size (number of elements) loaded into vector.
3814 int Matcher::max_vector_size(const BasicType bt) {
3815 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3816 }
3817 int Matcher::min_vector_size(const BasicType bt) {
3818 int max_size = max_vector_size(bt);
3819 // Min size which can be loaded into vector is 4 bytes.
3820 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3821 // Support for calling svml double64 vectors
3822 if (bt == T_DOUBLE) {
3823 size = 1;
3824 }
3825 return MIN2(size,max_size);
3826 }
3827
3828 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3829 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3830 // by default on Cascade Lake
3831 if (VM_Version::is_default_intel_cascade_lake()) {
3832 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3833 }
3834 return Matcher::max_vector_size(bt);
3835 }
3836
3837 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3838 return -1;
3839 }
3840
3841 // Vector ideal reg corresponding to specified size in bytes
3842 uint Matcher::vector_ideal_reg(int size) {
3843 assert(MaxVectorSize >= size, "");
3844 switch(size) {
3845 case 4: return Op_VecS;
3846 case 8: return Op_VecD;
3847 case 16: return Op_VecX;
3848 case 32: return Op_VecY;
3849 case 64: return Op_VecZ;
3850 }
3851 ShouldNotReachHere();
3852 return 0;
3853 }
3854
3855 // Check for shift by small constant as well
3856 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3857 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3858 shift->in(2)->get_int() <= 3 &&
3859 // Are there other uses besides address expressions?
3860 !matcher->is_visited(shift)) {
3861 address_visited.set(shift->_idx); // Flag as address_visited
3862 mstack.push(shift->in(2), Matcher::Visit);
3863 Node *conv = shift->in(1);
3864 // Allow Matcher to match the rule which bypass
3865 // ConvI2L operation for an array index on LP64
3866 // if the index value is positive.
3867 if (conv->Opcode() == Op_ConvI2L &&
3868 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3869 // Are there other uses besides address expressions?
3870 !matcher->is_visited(conv)) {
3871 address_visited.set(conv->_idx); // Flag as address_visited
3872 mstack.push(conv->in(1), Matcher::Pre_Visit);
3873 } else {
3874 mstack.push(conv, Matcher::Pre_Visit);
3875 }
3876 return true;
3877 }
3878 return false;
3879 }
3880
3881 // This function identifies sub-graphs in which a 'load' node is
3882 // input to two different nodes, and such that it can be matched
3883 // with BMI instructions like blsi, blsr, etc.
3884 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3885 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3886 // refers to the same node.
3887 //
3888 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3889 // This is a temporary solution until we make DAGs expressible in ADL.
3890 template<typename ConType>
3891 class FusedPatternMatcher {
3892 Node* _op1_node;
3893 Node* _mop_node;
3894 int _con_op;
3895
3896 static int match_next(Node* n, int next_op, int next_op_idx) {
3897 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3898 return -1;
3899 }
3900
3901 if (next_op_idx == -1) { // n is commutative, try rotations
3902 if (n->in(1)->Opcode() == next_op) {
3903 return 1;
3904 } else if (n->in(2)->Opcode() == next_op) {
3905 return 2;
3906 }
3907 } else {
3908 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3909 if (n->in(next_op_idx)->Opcode() == next_op) {
3910 return next_op_idx;
3911 }
3912 }
3913 return -1;
3914 }
3915
3916 public:
3917 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3918 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3919
3920 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3921 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
3922 typename ConType::NativeType con_value) {
3923 if (_op1_node->Opcode() != op1) {
3924 return false;
3925 }
3926 if (_mop_node->outcnt() > 2) {
3927 return false;
3928 }
3929 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
3930 if (op1_op2_idx == -1) {
3931 return false;
3932 }
3933 // Memory operation must be the other edge
3934 int op1_mop_idx = (op1_op2_idx & 1) + 1;
3935
3936 // Check that the mop node is really what we want
3937 if (_op1_node->in(op1_mop_idx) == _mop_node) {
3938 Node* op2_node = _op1_node->in(op1_op2_idx);
3939 if (op2_node->outcnt() > 1) {
3940 return false;
3941 }
3942 assert(op2_node->Opcode() == op2, "Should be");
3943 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
3944 if (op2_con_idx == -1) {
3945 return false;
3946 }
3947 // Memory operation must be the other edge
3948 int op2_mop_idx = (op2_con_idx & 1) + 1;
3949 // Check that the memory operation is the same node
3950 if (op2_node->in(op2_mop_idx) == _mop_node) {
3951 // Now check the constant
3952 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
3953 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
3954 return true;
3955 }
3956 }
3957 }
3958 return false;
3959 }
3960 };
3961
3962 static bool is_bmi_pattern(Node* n, Node* m) {
3963 assert(UseBMI1Instructions, "sanity");
3964 if (n != nullptr && m != nullptr) {
3965 if (m->Opcode() == Op_LoadI) {
3966 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
3967 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
3968 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
3969 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
3970 } else if (m->Opcode() == Op_LoadL) {
3971 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
3972 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
3973 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
3974 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
3975 }
3976 }
3977 return false;
3978 }
3979
3980 // Should the matcher clone input 'm' of node 'n'?
3981 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
3982 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
3983 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
3984 mstack.push(m, Visit);
3985 return true;
3986 }
3987 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
3988 mstack.push(m, Visit); // m = ShiftCntV
3989 return true;
3990 }
3991 if (is_encode_and_store_pattern(n, m)) {
3992 mstack.push(m, Visit);
3993 return true;
3994 }
3995 return false;
3996 }
3997
3998 // Should the Matcher clone shifts on addressing modes, expecting them
3999 // to be subsumed into complex addressing expressions or compute them
4000 // into registers?
4001 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4002 Node *off = m->in(AddPNode::Offset);
4003 if (off->is_Con()) {
4004 address_visited.test_set(m->_idx); // Flag as address_visited
4005 Node *adr = m->in(AddPNode::Address);
4006
4007 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4008 // AtomicAdd is not an addressing expression.
4009 // Cheap to find it by looking for screwy base.
4010 if (adr->is_AddP() &&
4011 !adr->in(AddPNode::Base)->is_top() &&
4012 !adr->in(AddPNode::Offset)->is_Con() &&
4013 off->get_long() == (int) (off->get_long()) && // immL32
4014 // Are there other uses besides address expressions?
4015 !is_visited(adr)) {
4016 address_visited.set(adr->_idx); // Flag as address_visited
4017 Node *shift = adr->in(AddPNode::Offset);
4018 if (!clone_shift(shift, this, mstack, address_visited)) {
4019 mstack.push(shift, Pre_Visit);
4020 }
4021 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4022 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4023 } else {
4024 mstack.push(adr, Pre_Visit);
4025 }
4026
4027 // Clone X+offset as it also folds into most addressing expressions
4028 mstack.push(off, Visit);
4029 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4030 return true;
4031 } else if (clone_shift(off, this, mstack, address_visited)) {
4032 address_visited.test_set(m->_idx); // Flag as address_visited
4033 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4034 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4035 return true;
4036 }
4037 return false;
4038 }
4039
4040 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4041 switch (bt) {
4042 case BoolTest::eq:
4043 return Assembler::eq;
4044 case BoolTest::ne:
4045 return Assembler::neq;
4046 case BoolTest::le:
4047 case BoolTest::ule:
4048 return Assembler::le;
4049 case BoolTest::ge:
4050 case BoolTest::uge:
4051 return Assembler::nlt;
4052 case BoolTest::lt:
4053 case BoolTest::ult:
4054 return Assembler::lt;
4055 case BoolTest::gt:
4056 case BoolTest::ugt:
4057 return Assembler::nle;
4058 default : ShouldNotReachHere(); return Assembler::_false;
4059 }
4060 }
4061
4062 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4063 switch (bt) {
4064 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4065 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4066 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4067 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4068 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4069 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4070 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4071 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4072 }
4073 }
4074
4075 // Helper methods for MachSpillCopyNode::implementation().
4076 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4077 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4078 assert(ireg == Op_VecS || // 32bit vector
4079 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4080 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4081 "no non-adjacent vector moves" );
4082 if (masm) {
4083 switch (ireg) {
4084 case Op_VecS: // copy whole register
4085 case Op_VecD:
4086 case Op_VecX:
4087 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4088 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4089 } else {
4090 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4091 }
4092 break;
4093 case Op_VecY:
4094 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4095 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4096 } else {
4097 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4098 }
4099 break;
4100 case Op_VecZ:
4101 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4102 break;
4103 default:
4104 ShouldNotReachHere();
4105 }
4106 #ifndef PRODUCT
4107 } else {
4108 switch (ireg) {
4109 case Op_VecS:
4110 case Op_VecD:
4111 case Op_VecX:
4112 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4113 break;
4114 case Op_VecY:
4115 case Op_VecZ:
4116 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4117 break;
4118 default:
4119 ShouldNotReachHere();
4120 }
4121 #endif
4122 }
4123 }
4124
4125 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4126 int stack_offset, int reg, uint ireg, outputStream* st) {
4127 if (masm) {
4128 if (is_load) {
4129 switch (ireg) {
4130 case Op_VecS:
4131 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4132 break;
4133 case Op_VecD:
4134 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4135 break;
4136 case Op_VecX:
4137 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4138 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4139 } else {
4140 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4141 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4142 }
4143 break;
4144 case Op_VecY:
4145 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4146 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4147 } else {
4148 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4149 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4150 }
4151 break;
4152 case Op_VecZ:
4153 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4154 break;
4155 default:
4156 ShouldNotReachHere();
4157 }
4158 } else { // store
4159 switch (ireg) {
4160 case Op_VecS:
4161 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4162 break;
4163 case Op_VecD:
4164 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4165 break;
4166 case Op_VecX:
4167 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4168 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4169 }
4170 else {
4171 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4172 }
4173 break;
4174 case Op_VecY:
4175 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4176 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4177 }
4178 else {
4179 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4180 }
4181 break;
4182 case Op_VecZ:
4183 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4184 break;
4185 default:
4186 ShouldNotReachHere();
4187 }
4188 }
4189 #ifndef PRODUCT
4190 } else {
4191 if (is_load) {
4192 switch (ireg) {
4193 case Op_VecS:
4194 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4195 break;
4196 case Op_VecD:
4197 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4198 break;
4199 case Op_VecX:
4200 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4201 break;
4202 case Op_VecY:
4203 case Op_VecZ:
4204 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4205 break;
4206 default:
4207 ShouldNotReachHere();
4208 }
4209 } else { // store
4210 switch (ireg) {
4211 case Op_VecS:
4212 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4213 break;
4214 case Op_VecD:
4215 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4216 break;
4217 case Op_VecX:
4218 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4219 break;
4220 case Op_VecY:
4221 case Op_VecZ:
4222 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4223 break;
4224 default:
4225 ShouldNotReachHere();
4226 }
4227 }
4228 #endif
4229 }
4230 }
4231
4232 template <class T>
4233 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4234 int size = type2aelembytes(bt) * len;
4235 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4236 for (int i = 0; i < len; i++) {
4237 int offset = i * type2aelembytes(bt);
4238 switch (bt) {
4239 case T_BYTE: val->at(i) = con; break;
4240 case T_SHORT: {
4241 jshort c = con;
4242 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4243 break;
4244 }
4245 case T_INT: {
4246 jint c = con;
4247 memcpy(val->adr_at(offset), &c, sizeof(jint));
4248 break;
4249 }
4250 case T_LONG: {
4251 jlong c = con;
4252 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4253 break;
4254 }
4255 case T_FLOAT: {
4256 jfloat c = con;
4257 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4258 break;
4259 }
4260 case T_DOUBLE: {
4261 jdouble c = con;
4262 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4263 break;
4264 }
4265 default: assert(false, "%s", type2name(bt));
4266 }
4267 }
4268 return val;
4269 }
4270
4271 static inline jlong high_bit_set(BasicType bt) {
4272 switch (bt) {
4273 case T_BYTE: return 0x8080808080808080;
4274 case T_SHORT: return 0x8000800080008000;
4275 case T_INT: return 0x8000000080000000;
4276 case T_LONG: return 0x8000000000000000;
4277 default:
4278 ShouldNotReachHere();
4279 return 0;
4280 }
4281 }
4282
4283 #ifndef PRODUCT
4284 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4285 st->print("nop \t# %d bytes pad for loops and calls", _count);
4286 }
4287 #endif
4288
4289 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4290 __ nop(_count);
4291 }
4292
4293 uint MachNopNode::size(PhaseRegAlloc*) const {
4294 return _count;
4295 }
4296
4297 #ifndef PRODUCT
4298 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4299 st->print("# breakpoint");
4300 }
4301 #endif
4302
4303 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4304 __ int3();
4305 }
4306
4307 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4308 return MachNode::size(ra_);
4309 }
4310
4311 %}
4312
4313 //----------ENCODING BLOCK-----------------------------------------------------
4314 // This block specifies the encoding classes used by the compiler to
4315 // output byte streams. Encoding classes are parameterized macros
4316 // used by Machine Instruction Nodes in order to generate the bit
4317 // encoding of the instruction. Operands specify their base encoding
4318 // interface with the interface keyword. There are currently
4319 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4320 // COND_INTER. REG_INTER causes an operand to generate a function
4321 // which returns its register number when queried. CONST_INTER causes
4322 // an operand to generate a function which returns the value of the
4323 // constant when queried. MEMORY_INTER causes an operand to generate
4324 // four functions which return the Base Register, the Index Register,
4325 // the Scale Value, and the Offset Value of the operand when queried.
4326 // COND_INTER causes an operand to generate six functions which return
4327 // the encoding code (ie - encoding bits for the instruction)
4328 // associated with each basic boolean condition for a conditional
4329 // instruction.
4330 //
4331 // Instructions specify two basic values for encoding. Again, a
4332 // function is available to check if the constant displacement is an
4333 // oop. They use the ins_encode keyword to specify their encoding
4334 // classes (which must be a sequence of enc_class names, and their
4335 // parameters, specified in the encoding block), and they use the
4336 // opcode keyword to specify, in order, their primary, secondary, and
4337 // tertiary opcode. Only the opcode sections which a particular
4338 // instruction needs for encoding need to be specified.
4339 encode %{
4340 enc_class cdql_enc(no_rax_rdx_RegI div)
4341 %{
4342 // Full implementation of Java idiv and irem; checks for
4343 // special case as described in JVM spec., p.243 & p.271.
4344 //
4345 // normal case special case
4346 //
4347 // input : rax: dividend min_int
4348 // reg: divisor -1
4349 //
4350 // output: rax: quotient (= rax idiv reg) min_int
4351 // rdx: remainder (= rax irem reg) 0
4352 //
4353 // Code sequnce:
4354 //
4355 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4356 // 5: 75 07/08 jne e <normal>
4357 // 7: 33 d2 xor %edx,%edx
4358 // [div >= 8 -> offset + 1]
4359 // [REX_B]
4360 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4361 // c: 74 03/04 je 11 <done>
4362 // 000000000000000e <normal>:
4363 // e: 99 cltd
4364 // [div >= 8 -> offset + 1]
4365 // [REX_B]
4366 // f: f7 f9 idiv $div
4367 // 0000000000000011 <done>:
4368 Label normal;
4369 Label done;
4370
4371 // cmp $0x80000000,%eax
4372 __ cmpl(as_Register(RAX_enc), 0x80000000);
4373
4374 // jne e <normal>
4375 __ jccb(Assembler::notEqual, normal);
4376
4377 // xor %edx,%edx
4378 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4379
4380 // cmp $0xffffffffffffffff,%ecx
4381 __ cmpl($div$$Register, -1);
4382
4383 // je 11 <done>
4384 __ jccb(Assembler::equal, done);
4385
4386 // <normal>
4387 // cltd
4388 __ bind(normal);
4389 __ cdql();
4390
4391 // idivl
4392 // <done>
4393 __ idivl($div$$Register);
4394 __ bind(done);
4395 %}
4396
4397 enc_class cdqq_enc(no_rax_rdx_RegL div)
4398 %{
4399 // Full implementation of Java ldiv and lrem; checks for
4400 // special case as described in JVM spec., p.243 & p.271.
4401 //
4402 // normal case special case
4403 //
4404 // input : rax: dividend min_long
4405 // reg: divisor -1
4406 //
4407 // output: rax: quotient (= rax idiv reg) min_long
4408 // rdx: remainder (= rax irem reg) 0
4409 //
4410 // Code sequnce:
4411 //
4412 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4413 // 7: 00 00 80
4414 // a: 48 39 d0 cmp %rdx,%rax
4415 // d: 75 08 jne 17 <normal>
4416 // f: 33 d2 xor %edx,%edx
4417 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4418 // 15: 74 05 je 1c <done>
4419 // 0000000000000017 <normal>:
4420 // 17: 48 99 cqto
4421 // 19: 48 f7 f9 idiv $div
4422 // 000000000000001c <done>:
4423 Label normal;
4424 Label done;
4425
4426 // mov $0x8000000000000000,%rdx
4427 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4428
4429 // cmp %rdx,%rax
4430 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4431
4432 // jne 17 <normal>
4433 __ jccb(Assembler::notEqual, normal);
4434
4435 // xor %edx,%edx
4436 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4437
4438 // cmp $0xffffffffffffffff,$div
4439 __ cmpq($div$$Register, -1);
4440
4441 // je 1e <done>
4442 __ jccb(Assembler::equal, done);
4443
4444 // <normal>
4445 // cqto
4446 __ bind(normal);
4447 __ cdqq();
4448
4449 // idivq (note: must be emitted by the user of this rule)
4450 // <done>
4451 __ idivq($div$$Register);
4452 __ bind(done);
4453 %}
4454
4455 enc_class clear_avx %{
4456 DEBUG_ONLY(int off0 = __ offset());
4457 if (generate_vzeroupper(Compile::current())) {
4458 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4459 // Clear upper bits of YMM registers when current compiled code uses
4460 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4461 __ vzeroupper();
4462 }
4463 DEBUG_ONLY(int off1 = __ offset());
4464 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4465 %}
4466
4467 enc_class Java_To_Runtime(method meth) %{
4468 __ lea(r10, RuntimeAddress((address)$meth$$method));
4469 __ call(r10);
4470 __ post_call_nop();
4471 %}
4472
4473 enc_class Java_Static_Call(method meth)
4474 %{
4475 // JAVA STATIC CALL
4476 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4477 // determine who we intended to call.
4478 if (!_method) {
4479 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4480 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4481 // The NOP here is purely to ensure that eliding a call to
4482 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4483 __ addr_nop_5();
4484 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4485 } else {
4486 int method_index = resolved_method_index(masm);
4487 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4488 : static_call_Relocation::spec(method_index);
4489 address mark = __ pc();
4490 int call_offset = __ offset();
4491 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4492 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4493 // Calls of the same statically bound method can share
4494 // a stub to the interpreter.
4495 __ code()->shared_stub_to_interp_for(_method, call_offset);
4496 } else {
4497 // Emit stubs for static call.
4498 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4499 __ clear_inst_mark();
4500 if (stub == nullptr) {
4501 ciEnv::current()->record_failure("CodeCache is full");
4502 return;
4503 }
4504 }
4505 }
4506 __ post_call_nop();
4507 %}
4508
4509 enc_class Java_Dynamic_Call(method meth) %{
4510 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4511 __ post_call_nop();
4512 %}
4513
4514 enc_class call_epilog %{
4515 if (VerifyStackAtCalls) {
4516 // Check that stack depth is unchanged: find majik cookie on stack
4517 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4518 Label L;
4519 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4520 __ jccb(Assembler::equal, L);
4521 // Die if stack mismatch
4522 __ int3();
4523 __ bind(L);
4524 }
4525 %}
4526
4527 %}
4528
4529 //----------FRAME--------------------------------------------------------------
4530 // Definition of frame structure and management information.
4531 //
4532 // S T A C K L A Y O U T Allocators stack-slot number
4533 // | (to get allocators register number
4534 // G Owned by | | v add OptoReg::stack0())
4535 // r CALLER | |
4536 // o | +--------+ pad to even-align allocators stack-slot
4537 // w V | pad0 | numbers; owned by CALLER
4538 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4539 // h ^ | in | 5
4540 // | | args | 4 Holes in incoming args owned by SELF
4541 // | | | | 3
4542 // | | +--------+
4543 // V | | old out| Empty on Intel, window on Sparc
4544 // | old |preserve| Must be even aligned.
4545 // | SP-+--------+----> Matcher::_old_SP, even aligned
4546 // | | in | 3 area for Intel ret address
4547 // Owned by |preserve| Empty on Sparc.
4548 // SELF +--------+
4549 // | | pad2 | 2 pad to align old SP
4550 // | +--------+ 1
4551 // | | locks | 0
4552 // | +--------+----> OptoReg::stack0(), even aligned
4553 // | | pad1 | 11 pad to align new SP
4554 // | +--------+
4555 // | | | 10
4556 // | | spills | 9 spills
4557 // V | | 8 (pad0 slot for callee)
4558 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4559 // ^ | out | 7
4560 // | | args | 6 Holes in outgoing args owned by CALLEE
4561 // Owned by +--------+
4562 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4563 // | new |preserve| Must be even-aligned.
4564 // | SP-+--------+----> Matcher::_new_SP, even aligned
4565 // | | |
4566 //
4567 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4568 // known from SELF's arguments and the Java calling convention.
4569 // Region 6-7 is determined per call site.
4570 // Note 2: If the calling convention leaves holes in the incoming argument
4571 // area, those holes are owned by SELF. Holes in the outgoing area
4572 // are owned by the CALLEE. Holes should not be necessary in the
4573 // incoming area, as the Java calling convention is completely under
4574 // the control of the AD file. Doubles can be sorted and packed to
4575 // avoid holes. Holes in the outgoing arguments may be necessary for
4576 // varargs C calling conventions.
4577 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4578 // even aligned with pad0 as needed.
4579 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4580 // region 6-11 is even aligned; it may be padded out more so that
4581 // the region from SP to FP meets the minimum stack alignment.
4582 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4583 // alignment. Region 11, pad1, may be dynamically extended so that
4584 // SP meets the minimum alignment.
4585
4586 frame
4587 %{
4588 // These three registers define part of the calling convention
4589 // between compiled code and the interpreter.
4590 inline_cache_reg(RAX); // Inline Cache Register
4591
4592 // Optional: name the operand used by cisc-spilling to access
4593 // [stack_pointer + offset]
4594 cisc_spilling_operand_name(indOffset32);
4595
4596 // Number of stack slots consumed by locking an object
4597 sync_stack_slots(2);
4598
4599 // Compiled code's Frame Pointer
4600 frame_pointer(RSP);
4601
4602 // Interpreter stores its frame pointer in a register which is
4603 // stored to the stack by I2CAdaptors.
4604 // I2CAdaptors convert from interpreted java to compiled java.
4605 interpreter_frame_pointer(RBP);
4606
4607 // Stack alignment requirement
4608 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4609
4610 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4611 // for calls to C. Supports the var-args backing area for register parms.
4612 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4613
4614 // The after-PROLOG location of the return address. Location of
4615 // return address specifies a type (REG or STACK) and a number
4616 // representing the register number (i.e. - use a register name) or
4617 // stack slot.
4618 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4619 // Otherwise, it is above the locks and verification slot and alignment word
4620 return_addr(STACK - 2 +
4621 align_up((Compile::current()->in_preserve_stack_slots() +
4622 Compile::current()->fixed_slots()),
4623 stack_alignment_in_slots()));
4624
4625 // Location of compiled Java return values. Same as C for now.
4626 return_value
4627 %{
4628 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4629 "only return normal values");
4630
4631 static const int lo[Op_RegL + 1] = {
4632 0,
4633 0,
4634 RAX_num, // Op_RegN
4635 RAX_num, // Op_RegI
4636 RAX_num, // Op_RegP
4637 XMM0_num, // Op_RegF
4638 XMM0_num, // Op_RegD
4639 RAX_num // Op_RegL
4640 };
4641 static const int hi[Op_RegL + 1] = {
4642 0,
4643 0,
4644 OptoReg::Bad, // Op_RegN
4645 OptoReg::Bad, // Op_RegI
4646 RAX_H_num, // Op_RegP
4647 OptoReg::Bad, // Op_RegF
4648 XMM0b_num, // Op_RegD
4649 RAX_H_num // Op_RegL
4650 };
4651 // Excluded flags and vector registers.
4652 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4653 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4654 %}
4655 %}
4656
4657 //----------ATTRIBUTES---------------------------------------------------------
4658 //----------Operand Attributes-------------------------------------------------
4659 op_attrib op_cost(0); // Required cost attribute
4660
4661 //----------Instruction Attributes---------------------------------------------
4662 ins_attrib ins_cost(100); // Required cost attribute
4663 ins_attrib ins_size(8); // Required size attribute (in bits)
4664 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4665 // a non-matching short branch variant
4666 // of some long branch?
4667 ins_attrib ins_alignment(1); // Required alignment attribute (must
4668 // be a power of 2) specifies the
4669 // alignment that some part of the
4670 // instruction (not necessarily the
4671 // start) requires. If > 1, a
4672 // compute_padding() function must be
4673 // provided for the instruction
4674
4675 // Whether this node is expanded during code emission into a sequence of
4676 // instructions and the first instruction can perform an implicit null check.
4677 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4678
4679 //----------OPERANDS-----------------------------------------------------------
4680 // Operand definitions must precede instruction definitions for correct parsing
4681 // in the ADLC because operands constitute user defined types which are used in
4682 // instruction definitions.
4683
4684 //----------Simple Operands----------------------------------------------------
4685 // Immediate Operands
4686 // Integer Immediate
4687 operand immI()
4688 %{
4689 match(ConI);
4690
4691 op_cost(10);
4692 format %{ %}
4693 interface(CONST_INTER);
4694 %}
4695
4696 // Constant for test vs zero
4697 operand immI_0()
4698 %{
4699 predicate(n->get_int() == 0);
4700 match(ConI);
4701
4702 op_cost(0);
4703 format %{ %}
4704 interface(CONST_INTER);
4705 %}
4706
4707 // Constant for increment
4708 operand immI_1()
4709 %{
4710 predicate(n->get_int() == 1);
4711 match(ConI);
4712
4713 op_cost(0);
4714 format %{ %}
4715 interface(CONST_INTER);
4716 %}
4717
4718 // Constant for decrement
4719 operand immI_M1()
4720 %{
4721 predicate(n->get_int() == -1);
4722 match(ConI);
4723
4724 op_cost(0);
4725 format %{ %}
4726 interface(CONST_INTER);
4727 %}
4728
4729 operand immI_2()
4730 %{
4731 predicate(n->get_int() == 2);
4732 match(ConI);
4733
4734 op_cost(0);
4735 format %{ %}
4736 interface(CONST_INTER);
4737 %}
4738
4739 operand immI_4()
4740 %{
4741 predicate(n->get_int() == 4);
4742 match(ConI);
4743
4744 op_cost(0);
4745 format %{ %}
4746 interface(CONST_INTER);
4747 %}
4748
4749 operand immI_8()
4750 %{
4751 predicate(n->get_int() == 8);
4752 match(ConI);
4753
4754 op_cost(0);
4755 format %{ %}
4756 interface(CONST_INTER);
4757 %}
4758
4759 // Valid scale values for addressing modes
4760 operand immI2()
4761 %{
4762 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4763 match(ConI);
4764
4765 format %{ %}
4766 interface(CONST_INTER);
4767 %}
4768
4769 operand immU7()
4770 %{
4771 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4772 match(ConI);
4773
4774 op_cost(5);
4775 format %{ %}
4776 interface(CONST_INTER);
4777 %}
4778
4779 operand immI8()
4780 %{
4781 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4782 match(ConI);
4783
4784 op_cost(5);
4785 format %{ %}
4786 interface(CONST_INTER);
4787 %}
4788
4789 operand immU8()
4790 %{
4791 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4792 match(ConI);
4793
4794 op_cost(5);
4795 format %{ %}
4796 interface(CONST_INTER);
4797 %}
4798
4799 operand immI16()
4800 %{
4801 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4802 match(ConI);
4803
4804 op_cost(10);
4805 format %{ %}
4806 interface(CONST_INTER);
4807 %}
4808
4809 // Int Immediate non-negative
4810 operand immU31()
4811 %{
4812 predicate(n->get_int() >= 0);
4813 match(ConI);
4814
4815 op_cost(0);
4816 format %{ %}
4817 interface(CONST_INTER);
4818 %}
4819
4820 // Pointer Immediate
4821 operand immP()
4822 %{
4823 match(ConP);
4824
4825 op_cost(10);
4826 format %{ %}
4827 interface(CONST_INTER);
4828 %}
4829
4830 // Null Pointer Immediate
4831 operand immP0()
4832 %{
4833 predicate(n->get_ptr() == 0);
4834 match(ConP);
4835
4836 op_cost(5);
4837 format %{ %}
4838 interface(CONST_INTER);
4839 %}
4840
4841 // Pointer Immediate
4842 operand immN() %{
4843 match(ConN);
4844
4845 op_cost(10);
4846 format %{ %}
4847 interface(CONST_INTER);
4848 %}
4849
4850 operand immNKlass() %{
4851 match(ConNKlass);
4852
4853 op_cost(10);
4854 format %{ %}
4855 interface(CONST_INTER);
4856 %}
4857
4858 // Null Pointer Immediate
4859 operand immN0() %{
4860 predicate(n->get_narrowcon() == 0);
4861 match(ConN);
4862
4863 op_cost(5);
4864 format %{ %}
4865 interface(CONST_INTER);
4866 %}
4867
4868 operand immP31()
4869 %{
4870 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4871 && (n->get_ptr() >> 31) == 0);
4872 match(ConP);
4873
4874 op_cost(5);
4875 format %{ %}
4876 interface(CONST_INTER);
4877 %}
4878
4879
4880 // Long Immediate
4881 operand immL()
4882 %{
4883 match(ConL);
4884
4885 op_cost(20);
4886 format %{ %}
4887 interface(CONST_INTER);
4888 %}
4889
4890 // Long Immediate 8-bit
4891 operand immL8()
4892 %{
4893 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4894 match(ConL);
4895
4896 op_cost(5);
4897 format %{ %}
4898 interface(CONST_INTER);
4899 %}
4900
4901 // Long Immediate 32-bit unsigned
4902 operand immUL32()
4903 %{
4904 predicate(n->get_long() == (unsigned int) (n->get_long()));
4905 match(ConL);
4906
4907 op_cost(10);
4908 format %{ %}
4909 interface(CONST_INTER);
4910 %}
4911
4912 // Long Immediate 32-bit signed
4913 operand immL32()
4914 %{
4915 predicate(n->get_long() == (int) (n->get_long()));
4916 match(ConL);
4917
4918 op_cost(15);
4919 format %{ %}
4920 interface(CONST_INTER);
4921 %}
4922
4923 operand immL_Pow2()
4924 %{
4925 predicate(is_power_of_2((julong)n->get_long()));
4926 match(ConL);
4927
4928 op_cost(15);
4929 format %{ %}
4930 interface(CONST_INTER);
4931 %}
4932
4933 operand immL_NotPow2()
4934 %{
4935 predicate(is_power_of_2((julong)~n->get_long()));
4936 match(ConL);
4937
4938 op_cost(15);
4939 format %{ %}
4940 interface(CONST_INTER);
4941 %}
4942
4943 // Long Immediate zero
4944 operand immL0()
4945 %{
4946 predicate(n->get_long() == 0L);
4947 match(ConL);
4948
4949 op_cost(10);
4950 format %{ %}
4951 interface(CONST_INTER);
4952 %}
4953
4954 // Constant for increment
4955 operand immL1()
4956 %{
4957 predicate(n->get_long() == 1);
4958 match(ConL);
4959
4960 format %{ %}
4961 interface(CONST_INTER);
4962 %}
4963
4964 // Constant for decrement
4965 operand immL_M1()
4966 %{
4967 predicate(n->get_long() == -1);
4968 match(ConL);
4969
4970 format %{ %}
4971 interface(CONST_INTER);
4972 %}
4973
4974 // Long Immediate: low 32-bit mask
4975 operand immL_32bits()
4976 %{
4977 predicate(n->get_long() == 0xFFFFFFFFL);
4978 match(ConL);
4979 op_cost(20);
4980
4981 format %{ %}
4982 interface(CONST_INTER);
4983 %}
4984
4985 // Int Immediate: 2^n-1, positive
4986 operand immI_Pow2M1()
4987 %{
4988 predicate((n->get_int() > 0)
4989 && is_power_of_2((juint)n->get_int() + 1));
4990 match(ConI);
4991
4992 op_cost(20);
4993 format %{ %}
4994 interface(CONST_INTER);
4995 %}
4996
4997 // Float Immediate zero
4998 operand immF0()
4999 %{
5000 predicate(jint_cast(n->getf()) == 0);
5001 match(ConF);
5002
5003 op_cost(5);
5004 format %{ %}
5005 interface(CONST_INTER);
5006 %}
5007
5008 // Float Immediate
5009 operand immF()
5010 %{
5011 match(ConF);
5012
5013 op_cost(15);
5014 format %{ %}
5015 interface(CONST_INTER);
5016 %}
5017
5018 // Half Float Immediate
5019 operand immH()
5020 %{
5021 match(ConH);
5022
5023 op_cost(15);
5024 format %{ %}
5025 interface(CONST_INTER);
5026 %}
5027
5028 // Double Immediate zero
5029 operand immD0()
5030 %{
5031 predicate(jlong_cast(n->getd()) == 0);
5032 match(ConD);
5033
5034 op_cost(5);
5035 format %{ %}
5036 interface(CONST_INTER);
5037 %}
5038
5039 // Double Immediate
5040 operand immD()
5041 %{
5042 match(ConD);
5043
5044 op_cost(15);
5045 format %{ %}
5046 interface(CONST_INTER);
5047 %}
5048
5049 // Immediates for special shifts (sign extend)
5050
5051 // Constants for increment
5052 operand immI_16()
5053 %{
5054 predicate(n->get_int() == 16);
5055 match(ConI);
5056
5057 format %{ %}
5058 interface(CONST_INTER);
5059 %}
5060
5061 operand immI_24()
5062 %{
5063 predicate(n->get_int() == 24);
5064 match(ConI);
5065
5066 format %{ %}
5067 interface(CONST_INTER);
5068 %}
5069
5070 // Constant for byte-wide masking
5071 operand immI_255()
5072 %{
5073 predicate(n->get_int() == 255);
5074 match(ConI);
5075
5076 format %{ %}
5077 interface(CONST_INTER);
5078 %}
5079
5080 // Constant for short-wide masking
5081 operand immI_65535()
5082 %{
5083 predicate(n->get_int() == 65535);
5084 match(ConI);
5085
5086 format %{ %}
5087 interface(CONST_INTER);
5088 %}
5089
5090 // Constant for byte-wide masking
5091 operand immL_255()
5092 %{
5093 predicate(n->get_long() == 255);
5094 match(ConL);
5095
5096 format %{ %}
5097 interface(CONST_INTER);
5098 %}
5099
5100 // Constant for short-wide masking
5101 operand immL_65535()
5102 %{
5103 predicate(n->get_long() == 65535);
5104 match(ConL);
5105
5106 format %{ %}
5107 interface(CONST_INTER);
5108 %}
5109
5110 operand kReg()
5111 %{
5112 constraint(ALLOC_IN_RC(vectmask_reg));
5113 match(RegVectMask);
5114 format %{%}
5115 interface(REG_INTER);
5116 %}
5117
5118 // Register Operands
5119 // Integer Register
5120 operand rRegI()
5121 %{
5122 constraint(ALLOC_IN_RC(int_reg));
5123 match(RegI);
5124
5125 match(rax_RegI);
5126 match(rbx_RegI);
5127 match(rcx_RegI);
5128 match(rdx_RegI);
5129 match(rdi_RegI);
5130
5131 format %{ %}
5132 interface(REG_INTER);
5133 %}
5134
5135 // Special Registers
5136 operand rax_RegI()
5137 %{
5138 constraint(ALLOC_IN_RC(int_rax_reg));
5139 match(RegI);
5140 match(rRegI);
5141
5142 format %{ "RAX" %}
5143 interface(REG_INTER);
5144 %}
5145
5146 // Special Registers
5147 operand rbx_RegI()
5148 %{
5149 constraint(ALLOC_IN_RC(int_rbx_reg));
5150 match(RegI);
5151 match(rRegI);
5152
5153 format %{ "RBX" %}
5154 interface(REG_INTER);
5155 %}
5156
5157 operand rcx_RegI()
5158 %{
5159 constraint(ALLOC_IN_RC(int_rcx_reg));
5160 match(RegI);
5161 match(rRegI);
5162
5163 format %{ "RCX" %}
5164 interface(REG_INTER);
5165 %}
5166
5167 operand rdx_RegI()
5168 %{
5169 constraint(ALLOC_IN_RC(int_rdx_reg));
5170 match(RegI);
5171 match(rRegI);
5172
5173 format %{ "RDX" %}
5174 interface(REG_INTER);
5175 %}
5176
5177 operand rdi_RegI()
5178 %{
5179 constraint(ALLOC_IN_RC(int_rdi_reg));
5180 match(RegI);
5181 match(rRegI);
5182
5183 format %{ "RDI" %}
5184 interface(REG_INTER);
5185 %}
5186
5187 operand no_rax_rdx_RegI()
5188 %{
5189 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5190 match(RegI);
5191 match(rbx_RegI);
5192 match(rcx_RegI);
5193 match(rdi_RegI);
5194
5195 format %{ %}
5196 interface(REG_INTER);
5197 %}
5198
5199 operand no_rbp_r13_RegI()
5200 %{
5201 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5202 match(RegI);
5203 match(rRegI);
5204 match(rax_RegI);
5205 match(rbx_RegI);
5206 match(rcx_RegI);
5207 match(rdx_RegI);
5208 match(rdi_RegI);
5209
5210 format %{ %}
5211 interface(REG_INTER);
5212 %}
5213
5214 // Pointer Register
5215 operand any_RegP()
5216 %{
5217 constraint(ALLOC_IN_RC(any_reg));
5218 match(RegP);
5219 match(rax_RegP);
5220 match(rbx_RegP);
5221 match(rdi_RegP);
5222 match(rsi_RegP);
5223 match(rbp_RegP);
5224 match(r15_RegP);
5225 match(rRegP);
5226
5227 format %{ %}
5228 interface(REG_INTER);
5229 %}
5230
5231 operand rRegP()
5232 %{
5233 constraint(ALLOC_IN_RC(ptr_reg));
5234 match(RegP);
5235 match(rax_RegP);
5236 match(rbx_RegP);
5237 match(rdi_RegP);
5238 match(rsi_RegP);
5239 match(rbp_RegP); // See Q&A below about
5240 match(r15_RegP); // r15_RegP and rbp_RegP.
5241
5242 format %{ %}
5243 interface(REG_INTER);
5244 %}
5245
5246 operand rRegN() %{
5247 constraint(ALLOC_IN_RC(int_reg));
5248 match(RegN);
5249
5250 format %{ %}
5251 interface(REG_INTER);
5252 %}
5253
5254 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5255 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5256 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5257 // The output of an instruction is controlled by the allocator, which respects
5258 // register class masks, not match rules. Unless an instruction mentions
5259 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5260 // by the allocator as an input.
5261 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5262 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5263 // result, RBP is not included in the output of the instruction either.
5264
5265 // This operand is not allowed to use RBP even if
5266 // RBP is not used to hold the frame pointer.
5267 operand no_rbp_RegP()
5268 %{
5269 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5270 match(RegP);
5271 match(rbx_RegP);
5272 match(rsi_RegP);
5273 match(rdi_RegP);
5274
5275 format %{ %}
5276 interface(REG_INTER);
5277 %}
5278
5279 // Special Registers
5280 // Return a pointer value
5281 operand rax_RegP()
5282 %{
5283 constraint(ALLOC_IN_RC(ptr_rax_reg));
5284 match(RegP);
5285 match(rRegP);
5286
5287 format %{ %}
5288 interface(REG_INTER);
5289 %}
5290
5291 // Special Registers
5292 // Return a compressed pointer value
5293 operand rax_RegN()
5294 %{
5295 constraint(ALLOC_IN_RC(int_rax_reg));
5296 match(RegN);
5297 match(rRegN);
5298
5299 format %{ %}
5300 interface(REG_INTER);
5301 %}
5302
5303 // Used in AtomicAdd
5304 operand rbx_RegP()
5305 %{
5306 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5307 match(RegP);
5308 match(rRegP);
5309
5310 format %{ %}
5311 interface(REG_INTER);
5312 %}
5313
5314 operand rsi_RegP()
5315 %{
5316 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5317 match(RegP);
5318 match(rRegP);
5319
5320 format %{ %}
5321 interface(REG_INTER);
5322 %}
5323
5324 operand rbp_RegP()
5325 %{
5326 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5327 match(RegP);
5328 match(rRegP);
5329
5330 format %{ %}
5331 interface(REG_INTER);
5332 %}
5333
5334 // Used in rep stosq
5335 operand rdi_RegP()
5336 %{
5337 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5338 match(RegP);
5339 match(rRegP);
5340
5341 format %{ %}
5342 interface(REG_INTER);
5343 %}
5344
5345 operand r15_RegP()
5346 %{
5347 constraint(ALLOC_IN_RC(ptr_r15_reg));
5348 match(RegP);
5349 match(rRegP);
5350
5351 format %{ %}
5352 interface(REG_INTER);
5353 %}
5354
5355 operand rRegL()
5356 %{
5357 constraint(ALLOC_IN_RC(long_reg));
5358 match(RegL);
5359 match(rax_RegL);
5360 match(rdx_RegL);
5361
5362 format %{ %}
5363 interface(REG_INTER);
5364 %}
5365
5366 // Special Registers
5367 operand no_rax_rdx_RegL()
5368 %{
5369 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5370 match(RegL);
5371 match(rRegL);
5372
5373 format %{ %}
5374 interface(REG_INTER);
5375 %}
5376
5377 operand rax_RegL()
5378 %{
5379 constraint(ALLOC_IN_RC(long_rax_reg));
5380 match(RegL);
5381 match(rRegL);
5382
5383 format %{ "RAX" %}
5384 interface(REG_INTER);
5385 %}
5386
5387 operand rcx_RegL()
5388 %{
5389 constraint(ALLOC_IN_RC(long_rcx_reg));
5390 match(RegL);
5391 match(rRegL);
5392
5393 format %{ %}
5394 interface(REG_INTER);
5395 %}
5396
5397 operand rdx_RegL()
5398 %{
5399 constraint(ALLOC_IN_RC(long_rdx_reg));
5400 match(RegL);
5401 match(rRegL);
5402
5403 format %{ %}
5404 interface(REG_INTER);
5405 %}
5406
5407 operand r11_RegL()
5408 %{
5409 constraint(ALLOC_IN_RC(long_r11_reg));
5410 match(RegL);
5411 match(rRegL);
5412
5413 format %{ %}
5414 interface(REG_INTER);
5415 %}
5416
5417 operand no_rbp_r13_RegL()
5418 %{
5419 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5420 match(RegL);
5421 match(rRegL);
5422 match(rax_RegL);
5423 match(rcx_RegL);
5424 match(rdx_RegL);
5425
5426 format %{ %}
5427 interface(REG_INTER);
5428 %}
5429
5430 // Flags register, used as output of compare instructions
5431 operand rFlagsReg()
5432 %{
5433 constraint(ALLOC_IN_RC(int_flags));
5434 match(RegFlags);
5435
5436 format %{ "RFLAGS" %}
5437 interface(REG_INTER);
5438 %}
5439
5440 // Flags register, used as output of FLOATING POINT compare instructions
5441 operand rFlagsRegU()
5442 %{
5443 constraint(ALLOC_IN_RC(int_flags));
5444 match(RegFlags);
5445
5446 format %{ "RFLAGS_U" %}
5447 interface(REG_INTER);
5448 %}
5449
5450 operand rFlagsRegUCF() %{
5451 constraint(ALLOC_IN_RC(int_flags));
5452 match(RegFlags);
5453 predicate(false);
5454
5455 format %{ "RFLAGS_U_CF" %}
5456 interface(REG_INTER);
5457 %}
5458
5459 // Float register operands
5460 operand regF() %{
5461 constraint(ALLOC_IN_RC(float_reg));
5462 match(RegF);
5463
5464 format %{ %}
5465 interface(REG_INTER);
5466 %}
5467
5468 // Float register operands
5469 operand legRegF() %{
5470 constraint(ALLOC_IN_RC(float_reg_legacy));
5471 match(RegF);
5472
5473 format %{ %}
5474 interface(REG_INTER);
5475 %}
5476
5477 // Float register operands
5478 operand vlRegF() %{
5479 constraint(ALLOC_IN_RC(float_reg_vl));
5480 match(RegF);
5481
5482 format %{ %}
5483 interface(REG_INTER);
5484 %}
5485
5486 // Double register operands
5487 operand regD() %{
5488 constraint(ALLOC_IN_RC(double_reg));
5489 match(RegD);
5490
5491 format %{ %}
5492 interface(REG_INTER);
5493 %}
5494
5495 // Double register operands
5496 operand legRegD() %{
5497 constraint(ALLOC_IN_RC(double_reg_legacy));
5498 match(RegD);
5499
5500 format %{ %}
5501 interface(REG_INTER);
5502 %}
5503
5504 // Double register operands
5505 operand vlRegD() %{
5506 constraint(ALLOC_IN_RC(double_reg_vl));
5507 match(RegD);
5508
5509 format %{ %}
5510 interface(REG_INTER);
5511 %}
5512
5513 //----------Memory Operands----------------------------------------------------
5514 // Direct Memory Operand
5515 // operand direct(immP addr)
5516 // %{
5517 // match(addr);
5518
5519 // format %{ "[$addr]" %}
5520 // interface(MEMORY_INTER) %{
5521 // base(0xFFFFFFFF);
5522 // index(0x4);
5523 // scale(0x0);
5524 // disp($addr);
5525 // %}
5526 // %}
5527
5528 // Indirect Memory Operand
5529 operand indirect(any_RegP reg)
5530 %{
5531 constraint(ALLOC_IN_RC(ptr_reg));
5532 match(reg);
5533
5534 format %{ "[$reg]" %}
5535 interface(MEMORY_INTER) %{
5536 base($reg);
5537 index(0x4);
5538 scale(0x0);
5539 disp(0x0);
5540 %}
5541 %}
5542
5543 // Indirect Memory Plus Short Offset Operand
5544 operand indOffset8(any_RegP reg, immL8 off)
5545 %{
5546 constraint(ALLOC_IN_RC(ptr_reg));
5547 match(AddP reg off);
5548
5549 format %{ "[$reg + $off (8-bit)]" %}
5550 interface(MEMORY_INTER) %{
5551 base($reg);
5552 index(0x4);
5553 scale(0x0);
5554 disp($off);
5555 %}
5556 %}
5557
5558 // Indirect Memory Plus Long Offset Operand
5559 operand indOffset32(any_RegP reg, immL32 off)
5560 %{
5561 constraint(ALLOC_IN_RC(ptr_reg));
5562 match(AddP reg off);
5563
5564 format %{ "[$reg + $off (32-bit)]" %}
5565 interface(MEMORY_INTER) %{
5566 base($reg);
5567 index(0x4);
5568 scale(0x0);
5569 disp($off);
5570 %}
5571 %}
5572
5573 // Indirect Memory Plus Index Register Plus Offset Operand
5574 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5575 %{
5576 constraint(ALLOC_IN_RC(ptr_reg));
5577 match(AddP (AddP reg lreg) off);
5578
5579 op_cost(10);
5580 format %{"[$reg + $off + $lreg]" %}
5581 interface(MEMORY_INTER) %{
5582 base($reg);
5583 index($lreg);
5584 scale(0x0);
5585 disp($off);
5586 %}
5587 %}
5588
5589 // Indirect Memory Plus Index Register Plus Offset Operand
5590 operand indIndex(any_RegP reg, rRegL lreg)
5591 %{
5592 constraint(ALLOC_IN_RC(ptr_reg));
5593 match(AddP reg lreg);
5594
5595 op_cost(10);
5596 format %{"[$reg + $lreg]" %}
5597 interface(MEMORY_INTER) %{
5598 base($reg);
5599 index($lreg);
5600 scale(0x0);
5601 disp(0x0);
5602 %}
5603 %}
5604
5605 // Indirect Memory Times Scale Plus Index Register
5606 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5607 %{
5608 constraint(ALLOC_IN_RC(ptr_reg));
5609 match(AddP reg (LShiftL lreg scale));
5610
5611 op_cost(10);
5612 format %{"[$reg + $lreg << $scale]" %}
5613 interface(MEMORY_INTER) %{
5614 base($reg);
5615 index($lreg);
5616 scale($scale);
5617 disp(0x0);
5618 %}
5619 %}
5620
5621 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5622 %{
5623 constraint(ALLOC_IN_RC(ptr_reg));
5624 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5625 match(AddP reg (LShiftL (ConvI2L idx) scale));
5626
5627 op_cost(10);
5628 format %{"[$reg + pos $idx << $scale]" %}
5629 interface(MEMORY_INTER) %{
5630 base($reg);
5631 index($idx);
5632 scale($scale);
5633 disp(0x0);
5634 %}
5635 %}
5636
5637 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5638 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5639 %{
5640 constraint(ALLOC_IN_RC(ptr_reg));
5641 match(AddP (AddP reg (LShiftL lreg scale)) off);
5642
5643 op_cost(10);
5644 format %{"[$reg + $off + $lreg << $scale]" %}
5645 interface(MEMORY_INTER) %{
5646 base($reg);
5647 index($lreg);
5648 scale($scale);
5649 disp($off);
5650 %}
5651 %}
5652
5653 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5654 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5655 %{
5656 constraint(ALLOC_IN_RC(ptr_reg));
5657 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5658 match(AddP (AddP reg (ConvI2L idx)) off);
5659
5660 op_cost(10);
5661 format %{"[$reg + $off + $idx]" %}
5662 interface(MEMORY_INTER) %{
5663 base($reg);
5664 index($idx);
5665 scale(0x0);
5666 disp($off);
5667 %}
5668 %}
5669
5670 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5671 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5672 %{
5673 constraint(ALLOC_IN_RC(ptr_reg));
5674 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5675 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5676
5677 op_cost(10);
5678 format %{"[$reg + $off + $idx << $scale]" %}
5679 interface(MEMORY_INTER) %{
5680 base($reg);
5681 index($idx);
5682 scale($scale);
5683 disp($off);
5684 %}
5685 %}
5686
5687 // Indirect Narrow Oop Plus Offset Operand
5688 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5689 // we can't free r12 even with CompressedOops::base() == nullptr.
5690 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5691 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5692 constraint(ALLOC_IN_RC(ptr_reg));
5693 match(AddP (DecodeN reg) off);
5694
5695 op_cost(10);
5696 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5697 interface(MEMORY_INTER) %{
5698 base(0xc); // R12
5699 index($reg);
5700 scale(0x3);
5701 disp($off);
5702 %}
5703 %}
5704
5705 // Indirect Memory Operand
5706 operand indirectNarrow(rRegN reg)
5707 %{
5708 predicate(CompressedOops::shift() == 0);
5709 constraint(ALLOC_IN_RC(ptr_reg));
5710 match(DecodeN reg);
5711
5712 format %{ "[$reg]" %}
5713 interface(MEMORY_INTER) %{
5714 base($reg);
5715 index(0x4);
5716 scale(0x0);
5717 disp(0x0);
5718 %}
5719 %}
5720
5721 // Indirect Memory Plus Short Offset Operand
5722 operand indOffset8Narrow(rRegN reg, immL8 off)
5723 %{
5724 predicate(CompressedOops::shift() == 0);
5725 constraint(ALLOC_IN_RC(ptr_reg));
5726 match(AddP (DecodeN reg) off);
5727
5728 format %{ "[$reg + $off (8-bit)]" %}
5729 interface(MEMORY_INTER) %{
5730 base($reg);
5731 index(0x4);
5732 scale(0x0);
5733 disp($off);
5734 %}
5735 %}
5736
5737 // Indirect Memory Plus Long Offset Operand
5738 operand indOffset32Narrow(rRegN reg, immL32 off)
5739 %{
5740 predicate(CompressedOops::shift() == 0);
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(AddP (DecodeN reg) off);
5743
5744 format %{ "[$reg + $off (32-bit)]" %}
5745 interface(MEMORY_INTER) %{
5746 base($reg);
5747 index(0x4);
5748 scale(0x0);
5749 disp($off);
5750 %}
5751 %}
5752
5753 // Indirect Memory Plus Index Register Plus Offset Operand
5754 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5755 %{
5756 predicate(CompressedOops::shift() == 0);
5757 constraint(ALLOC_IN_RC(ptr_reg));
5758 match(AddP (AddP (DecodeN reg) lreg) off);
5759
5760 op_cost(10);
5761 format %{"[$reg + $off + $lreg]" %}
5762 interface(MEMORY_INTER) %{
5763 base($reg);
5764 index($lreg);
5765 scale(0x0);
5766 disp($off);
5767 %}
5768 %}
5769
5770 // Indirect Memory Plus Index Register Plus Offset Operand
5771 operand indIndexNarrow(rRegN reg, rRegL lreg)
5772 %{
5773 predicate(CompressedOops::shift() == 0);
5774 constraint(ALLOC_IN_RC(ptr_reg));
5775 match(AddP (DecodeN reg) lreg);
5776
5777 op_cost(10);
5778 format %{"[$reg + $lreg]" %}
5779 interface(MEMORY_INTER) %{
5780 base($reg);
5781 index($lreg);
5782 scale(0x0);
5783 disp(0x0);
5784 %}
5785 %}
5786
5787 // Indirect Memory Times Scale Plus Index Register
5788 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5789 %{
5790 predicate(CompressedOops::shift() == 0);
5791 constraint(ALLOC_IN_RC(ptr_reg));
5792 match(AddP (DecodeN reg) (LShiftL lreg scale));
5793
5794 op_cost(10);
5795 format %{"[$reg + $lreg << $scale]" %}
5796 interface(MEMORY_INTER) %{
5797 base($reg);
5798 index($lreg);
5799 scale($scale);
5800 disp(0x0);
5801 %}
5802 %}
5803
5804 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5805 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5806 %{
5807 predicate(CompressedOops::shift() == 0);
5808 constraint(ALLOC_IN_RC(ptr_reg));
5809 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5810
5811 op_cost(10);
5812 format %{"[$reg + $off + $lreg << $scale]" %}
5813 interface(MEMORY_INTER) %{
5814 base($reg);
5815 index($lreg);
5816 scale($scale);
5817 disp($off);
5818 %}
5819 %}
5820
5821 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5822 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5823 %{
5824 constraint(ALLOC_IN_RC(ptr_reg));
5825 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5826 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5827
5828 op_cost(10);
5829 format %{"[$reg + $off + $idx]" %}
5830 interface(MEMORY_INTER) %{
5831 base($reg);
5832 index($idx);
5833 scale(0x0);
5834 disp($off);
5835 %}
5836 %}
5837
5838 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5839 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5840 %{
5841 constraint(ALLOC_IN_RC(ptr_reg));
5842 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5843 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5844
5845 op_cost(10);
5846 format %{"[$reg + $off + $idx << $scale]" %}
5847 interface(MEMORY_INTER) %{
5848 base($reg);
5849 index($idx);
5850 scale($scale);
5851 disp($off);
5852 %}
5853 %}
5854
5855 //----------Special Memory Operands--------------------------------------------
5856 // Stack Slot Operand - This operand is used for loading and storing temporary
5857 // values on the stack where a match requires a value to
5858 // flow through memory.
5859 operand stackSlotP(sRegP reg)
5860 %{
5861 constraint(ALLOC_IN_RC(stack_slots));
5862 // No match rule because this operand is only generated in matching
5863
5864 format %{ "[$reg]" %}
5865 interface(MEMORY_INTER) %{
5866 base(0x4); // RSP
5867 index(0x4); // No Index
5868 scale(0x0); // No Scale
5869 disp($reg); // Stack Offset
5870 %}
5871 %}
5872
5873 operand stackSlotI(sRegI reg)
5874 %{
5875 constraint(ALLOC_IN_RC(stack_slots));
5876 // No match rule because this operand is only generated in matching
5877
5878 format %{ "[$reg]" %}
5879 interface(MEMORY_INTER) %{
5880 base(0x4); // RSP
5881 index(0x4); // No Index
5882 scale(0x0); // No Scale
5883 disp($reg); // Stack Offset
5884 %}
5885 %}
5886
5887 operand stackSlotF(sRegF reg)
5888 %{
5889 constraint(ALLOC_IN_RC(stack_slots));
5890 // No match rule because this operand is only generated in matching
5891
5892 format %{ "[$reg]" %}
5893 interface(MEMORY_INTER) %{
5894 base(0x4); // RSP
5895 index(0x4); // No Index
5896 scale(0x0); // No Scale
5897 disp($reg); // Stack Offset
5898 %}
5899 %}
5900
5901 operand stackSlotD(sRegD reg)
5902 %{
5903 constraint(ALLOC_IN_RC(stack_slots));
5904 // No match rule because this operand is only generated in matching
5905
5906 format %{ "[$reg]" %}
5907 interface(MEMORY_INTER) %{
5908 base(0x4); // RSP
5909 index(0x4); // No Index
5910 scale(0x0); // No Scale
5911 disp($reg); // Stack Offset
5912 %}
5913 %}
5914 operand stackSlotL(sRegL reg)
5915 %{
5916 constraint(ALLOC_IN_RC(stack_slots));
5917 // No match rule because this operand is only generated in matching
5918
5919 format %{ "[$reg]" %}
5920 interface(MEMORY_INTER) %{
5921 base(0x4); // RSP
5922 index(0x4); // No Index
5923 scale(0x0); // No Scale
5924 disp($reg); // Stack Offset
5925 %}
5926 %}
5927
5928 //----------Conditional Branch Operands----------------------------------------
5929 // Comparison Op - This is the operation of the comparison, and is limited to
5930 // the following set of codes:
5931 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5932 //
5933 // Other attributes of the comparison, such as unsignedness, are specified
5934 // by the comparison instruction that sets a condition code flags register.
5935 // That result is represented by a flags operand whose subtype is appropriate
5936 // to the unsignedness (etc.) of the comparison.
5937 //
5938 // Later, the instruction which matches both the Comparison Op (a Bool) and
5939 // the flags (produced by the Cmp) specifies the coding of the comparison op
5940 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5941
5942 // Comparison Code
5943 operand cmpOp()
5944 %{
5945 match(Bool);
5946
5947 format %{ "" %}
5948 interface(COND_INTER) %{
5949 equal(0x4, "e");
5950 not_equal(0x5, "ne");
5951 less(0xC, "l");
5952 greater_equal(0xD, "ge");
5953 less_equal(0xE, "le");
5954 greater(0xF, "g");
5955 overflow(0x0, "o");
5956 no_overflow(0x1, "no");
5957 %}
5958 %}
5959
5960 // Comparison Code, unsigned compare. Used by FP also, with
5961 // C2 (unordered) turned into GT or LT already. The other bits
5962 // C0 and C3 are turned into Carry & Zero flags.
5963 operand cmpOpU()
5964 %{
5965 match(Bool);
5966
5967 format %{ "" %}
5968 interface(COND_INTER) %{
5969 equal(0x4, "e");
5970 not_equal(0x5, "ne");
5971 less(0x2, "b");
5972 greater_equal(0x3, "ae");
5973 less_equal(0x6, "be");
5974 greater(0x7, "a");
5975 overflow(0x0, "o");
5976 no_overflow(0x1, "no");
5977 %}
5978 %}
5979
5980
5981 // Floating comparisons that don't require any fixup for the unordered case,
5982 // If both inputs of the comparison are the same, ZF is always set so we
5983 // don't need to use cmpOpUCF2 for eq/ne
5984 operand cmpOpUCF() %{
5985 match(Bool);
5986 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5987 n->as_Bool()->_test._test == BoolTest::ge ||
5988 n->as_Bool()->_test._test == BoolTest::le ||
5989 n->as_Bool()->_test._test == BoolTest::gt ||
5990 n->in(1)->in(1) == n->in(1)->in(2));
5991 format %{ "" %}
5992 interface(COND_INTER) %{
5993 equal(0xb, "np");
5994 not_equal(0xa, "p");
5995 less(0x2, "b");
5996 greater_equal(0x3, "ae");
5997 less_equal(0x6, "be");
5998 greater(0x7, "a");
5999 overflow(0x0, "o");
6000 no_overflow(0x1, "no");
6001 %}
6002 %}
6003
6004
6005 // Floating comparisons that can be fixed up with extra conditional jumps
6006 operand cmpOpUCF2() %{
6007 match(Bool);
6008 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6009 n->as_Bool()->_test._test == BoolTest::eq) &&
6010 n->in(1)->in(1) != n->in(1)->in(2));
6011 format %{ "" %}
6012 interface(COND_INTER) %{
6013 equal(0x4, "e");
6014 not_equal(0x5, "ne");
6015 less(0x2, "b");
6016 greater_equal(0x3, "ae");
6017 less_equal(0x6, "be");
6018 greater(0x7, "a");
6019 overflow(0x0, "o");
6020 no_overflow(0x1, "no");
6021 %}
6022 %}
6023
6024 // Operands for bound floating pointer register arguments
6025 operand rxmm0() %{
6026 constraint(ALLOC_IN_RC(xmm0_reg));
6027 match(VecX);
6028 format%{%}
6029 interface(REG_INTER);
6030 %}
6031
6032 // Vectors
6033
6034 // Dummy generic vector class. Should be used for all vector operands.
6035 // Replaced with vec[SDXYZ] during post-selection pass.
6036 operand vec() %{
6037 constraint(ALLOC_IN_RC(dynamic));
6038 match(VecX);
6039 match(VecY);
6040 match(VecZ);
6041 match(VecS);
6042 match(VecD);
6043
6044 format %{ %}
6045 interface(REG_INTER);
6046 %}
6047
6048 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6049 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6050 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6051 // runtime code generation via reg_class_dynamic.
6052 operand legVec() %{
6053 constraint(ALLOC_IN_RC(dynamic));
6054 match(VecX);
6055 match(VecY);
6056 match(VecZ);
6057 match(VecS);
6058 match(VecD);
6059
6060 format %{ %}
6061 interface(REG_INTER);
6062 %}
6063
6064 // Replaces vec during post-selection cleanup. See above.
6065 operand vecS() %{
6066 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6067 match(VecS);
6068
6069 format %{ %}
6070 interface(REG_INTER);
6071 %}
6072
6073 // Replaces legVec during post-selection cleanup. See above.
6074 operand legVecS() %{
6075 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6076 match(VecS);
6077
6078 format %{ %}
6079 interface(REG_INTER);
6080 %}
6081
6082 // Replaces vec during post-selection cleanup. See above.
6083 operand vecD() %{
6084 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6085 match(VecD);
6086
6087 format %{ %}
6088 interface(REG_INTER);
6089 %}
6090
6091 // Replaces legVec during post-selection cleanup. See above.
6092 operand legVecD() %{
6093 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6094 match(VecD);
6095
6096 format %{ %}
6097 interface(REG_INTER);
6098 %}
6099
6100 // Replaces vec during post-selection cleanup. See above.
6101 operand vecX() %{
6102 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6103 match(VecX);
6104
6105 format %{ %}
6106 interface(REG_INTER);
6107 %}
6108
6109 // Replaces legVec during post-selection cleanup. See above.
6110 operand legVecX() %{
6111 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6112 match(VecX);
6113
6114 format %{ %}
6115 interface(REG_INTER);
6116 %}
6117
6118 // Replaces vec during post-selection cleanup. See above.
6119 operand vecY() %{
6120 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6121 match(VecY);
6122
6123 format %{ %}
6124 interface(REG_INTER);
6125 %}
6126
6127 // Replaces legVec during post-selection cleanup. See above.
6128 operand legVecY() %{
6129 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6130 match(VecY);
6131
6132 format %{ %}
6133 interface(REG_INTER);
6134 %}
6135
6136 // Replaces vec during post-selection cleanup. See above.
6137 operand vecZ() %{
6138 constraint(ALLOC_IN_RC(vectorz_reg));
6139 match(VecZ);
6140
6141 format %{ %}
6142 interface(REG_INTER);
6143 %}
6144
6145 // Replaces legVec during post-selection cleanup. See above.
6146 operand legVecZ() %{
6147 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6148 match(VecZ);
6149
6150 format %{ %}
6151 interface(REG_INTER);
6152 %}
6153
6154 //----------OPERAND CLASSES----------------------------------------------------
6155 // Operand Classes are groups of operands that are used as to simplify
6156 // instruction definitions by not requiring the AD writer to specify separate
6157 // instructions for every form of operand when the instruction accepts
6158 // multiple operand types with the same basic encoding and format. The classic
6159 // case of this is memory operands.
6160
6161 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6162 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6163 indCompressedOopOffset,
6164 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6165 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6166 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6167
6168 //----------PIPELINE-----------------------------------------------------------
6169 // Rules which define the behavior of the target architectures pipeline.
6170 pipeline %{
6171
6172 //----------ATTRIBUTES---------------------------------------------------------
6173 attributes %{
6174 variable_size_instructions; // Fixed size instructions
6175 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6176 instruction_unit_size = 1; // An instruction is 1 bytes long
6177 instruction_fetch_unit_size = 16; // The processor fetches one line
6178 instruction_fetch_units = 1; // of 16 bytes
6179 %}
6180
6181 //----------RESOURCES----------------------------------------------------------
6182 // Resources are the functional units available to the machine
6183
6184 // Generic P2/P3 pipeline
6185 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6186 // 3 instructions decoded per cycle.
6187 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6188 // 3 ALU op, only ALU0 handles mul instructions.
6189 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6190 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6191 BR, FPU,
6192 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6193
6194 //----------PIPELINE DESCRIPTION-----------------------------------------------
6195 // Pipeline Description specifies the stages in the machine's pipeline
6196
6197 // Generic P2/P3 pipeline
6198 pipe_desc(S0, S1, S2, S3, S4, S5);
6199
6200 //----------PIPELINE CLASSES---------------------------------------------------
6201 // Pipeline Classes describe the stages in which input and output are
6202 // referenced by the hardware pipeline.
6203
6204 // Naming convention: ialu or fpu
6205 // Then: _reg
6206 // Then: _reg if there is a 2nd register
6207 // Then: _long if it's a pair of instructions implementing a long
6208 // Then: _fat if it requires the big decoder
6209 // Or: _mem if it requires the big decoder and a memory unit.
6210
6211 // Integer ALU reg operation
6212 pipe_class ialu_reg(rRegI dst)
6213 %{
6214 single_instruction;
6215 dst : S4(write);
6216 dst : S3(read);
6217 DECODE : S0; // any decoder
6218 ALU : S3; // any alu
6219 %}
6220
6221 // Long ALU reg operation
6222 pipe_class ialu_reg_long(rRegL dst)
6223 %{
6224 instruction_count(2);
6225 dst : S4(write);
6226 dst : S3(read);
6227 DECODE : S0(2); // any 2 decoders
6228 ALU : S3(2); // both alus
6229 %}
6230
6231 // Integer ALU reg operation using big decoder
6232 pipe_class ialu_reg_fat(rRegI dst)
6233 %{
6234 single_instruction;
6235 dst : S4(write);
6236 dst : S3(read);
6237 D0 : S0; // big decoder only
6238 ALU : S3; // any alu
6239 %}
6240
6241 // Integer ALU reg-reg operation
6242 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6243 %{
6244 single_instruction;
6245 dst : S4(write);
6246 src : S3(read);
6247 DECODE : S0; // any decoder
6248 ALU : S3; // any alu
6249 %}
6250
6251 // Integer ALU reg-reg operation
6252 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6253 %{
6254 single_instruction;
6255 dst : S4(write);
6256 src : S3(read);
6257 D0 : S0; // big decoder only
6258 ALU : S3; // any alu
6259 %}
6260
6261 // Integer ALU reg-mem operation
6262 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6263 %{
6264 single_instruction;
6265 dst : S5(write);
6266 mem : S3(read);
6267 D0 : S0; // big decoder only
6268 ALU : S4; // any alu
6269 MEM : S3; // any mem
6270 %}
6271
6272 // Integer mem operation (prefetch)
6273 pipe_class ialu_mem(memory mem)
6274 %{
6275 single_instruction;
6276 mem : S3(read);
6277 D0 : S0; // big decoder only
6278 MEM : S3; // any mem
6279 %}
6280
6281 // Integer Store to Memory
6282 pipe_class ialu_mem_reg(memory mem, rRegI src)
6283 %{
6284 single_instruction;
6285 mem : S3(read);
6286 src : S5(read);
6287 D0 : S0; // big decoder only
6288 ALU : S4; // any alu
6289 MEM : S3;
6290 %}
6291
6292 // // Long Store to Memory
6293 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6294 // %{
6295 // instruction_count(2);
6296 // mem : S3(read);
6297 // src : S5(read);
6298 // D0 : S0(2); // big decoder only; twice
6299 // ALU : S4(2); // any 2 alus
6300 // MEM : S3(2); // Both mems
6301 // %}
6302
6303 // Integer Store to Memory
6304 pipe_class ialu_mem_imm(memory mem)
6305 %{
6306 single_instruction;
6307 mem : S3(read);
6308 D0 : S0; // big decoder only
6309 ALU : S4; // any alu
6310 MEM : S3;
6311 %}
6312
6313 // Integer ALU0 reg-reg operation
6314 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6315 %{
6316 single_instruction;
6317 dst : S4(write);
6318 src : S3(read);
6319 D0 : S0; // Big decoder only
6320 ALU0 : S3; // only alu0
6321 %}
6322
6323 // Integer ALU0 reg-mem operation
6324 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6325 %{
6326 single_instruction;
6327 dst : S5(write);
6328 mem : S3(read);
6329 D0 : S0; // big decoder only
6330 ALU0 : S4; // ALU0 only
6331 MEM : S3; // any mem
6332 %}
6333
6334 // Integer ALU reg-reg operation
6335 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6336 %{
6337 single_instruction;
6338 cr : S4(write);
6339 src1 : S3(read);
6340 src2 : S3(read);
6341 DECODE : S0; // any decoder
6342 ALU : S3; // any alu
6343 %}
6344
6345 // Integer ALU reg-imm operation
6346 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6347 %{
6348 single_instruction;
6349 cr : S4(write);
6350 src1 : S3(read);
6351 DECODE : S0; // any decoder
6352 ALU : S3; // any alu
6353 %}
6354
6355 // Integer ALU reg-mem operation
6356 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6357 %{
6358 single_instruction;
6359 cr : S4(write);
6360 src1 : S3(read);
6361 src2 : S3(read);
6362 D0 : S0; // big decoder only
6363 ALU : S4; // any alu
6364 MEM : S3;
6365 %}
6366
6367 // Conditional move reg-reg
6368 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6369 %{
6370 instruction_count(4);
6371 y : S4(read);
6372 q : S3(read);
6373 p : S3(read);
6374 DECODE : S0(4); // any decoder
6375 %}
6376
6377 // Conditional move reg-reg
6378 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6379 %{
6380 single_instruction;
6381 dst : S4(write);
6382 src : S3(read);
6383 cr : S3(read);
6384 DECODE : S0; // any decoder
6385 %}
6386
6387 // Conditional move reg-mem
6388 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6389 %{
6390 single_instruction;
6391 dst : S4(write);
6392 src : S3(read);
6393 cr : S3(read);
6394 DECODE : S0; // any decoder
6395 MEM : S3;
6396 %}
6397
6398 // Conditional move reg-reg long
6399 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6400 %{
6401 single_instruction;
6402 dst : S4(write);
6403 src : S3(read);
6404 cr : S3(read);
6405 DECODE : S0(2); // any 2 decoders
6406 %}
6407
6408 // Float reg-reg operation
6409 pipe_class fpu_reg(regD dst)
6410 %{
6411 instruction_count(2);
6412 dst : S3(read);
6413 DECODE : S0(2); // any 2 decoders
6414 FPU : S3;
6415 %}
6416
6417 // Float reg-reg operation
6418 pipe_class fpu_reg_reg(regD dst, regD src)
6419 %{
6420 instruction_count(2);
6421 dst : S4(write);
6422 src : S3(read);
6423 DECODE : S0(2); // any 2 decoders
6424 FPU : S3;
6425 %}
6426
6427 // Float reg-reg operation
6428 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6429 %{
6430 instruction_count(3);
6431 dst : S4(write);
6432 src1 : S3(read);
6433 src2 : S3(read);
6434 DECODE : S0(3); // any 3 decoders
6435 FPU : S3(2);
6436 %}
6437
6438 // Float reg-reg operation
6439 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6440 %{
6441 instruction_count(4);
6442 dst : S4(write);
6443 src1 : S3(read);
6444 src2 : S3(read);
6445 src3 : S3(read);
6446 DECODE : S0(4); // any 3 decoders
6447 FPU : S3(2);
6448 %}
6449
6450 // Float reg-reg operation
6451 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6452 %{
6453 instruction_count(4);
6454 dst : S4(write);
6455 src1 : S3(read);
6456 src2 : S3(read);
6457 src3 : S3(read);
6458 DECODE : S1(3); // any 3 decoders
6459 D0 : S0; // Big decoder only
6460 FPU : S3(2);
6461 MEM : S3;
6462 %}
6463
6464 // Float reg-mem operation
6465 pipe_class fpu_reg_mem(regD dst, memory mem)
6466 %{
6467 instruction_count(2);
6468 dst : S5(write);
6469 mem : S3(read);
6470 D0 : S0; // big decoder only
6471 DECODE : S1; // any decoder for FPU POP
6472 FPU : S4;
6473 MEM : S3; // any mem
6474 %}
6475
6476 // Float reg-mem operation
6477 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6478 %{
6479 instruction_count(3);
6480 dst : S5(write);
6481 src1 : S3(read);
6482 mem : S3(read);
6483 D0 : S0; // big decoder only
6484 DECODE : S1(2); // any decoder for FPU POP
6485 FPU : S4;
6486 MEM : S3; // any mem
6487 %}
6488
6489 // Float mem-reg operation
6490 pipe_class fpu_mem_reg(memory mem, regD src)
6491 %{
6492 instruction_count(2);
6493 src : S5(read);
6494 mem : S3(read);
6495 DECODE : S0; // any decoder for FPU PUSH
6496 D0 : S1; // big decoder only
6497 FPU : S4;
6498 MEM : S3; // any mem
6499 %}
6500
6501 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6502 %{
6503 instruction_count(3);
6504 src1 : S3(read);
6505 src2 : S3(read);
6506 mem : S3(read);
6507 DECODE : S0(2); // any decoder for FPU PUSH
6508 D0 : S1; // big decoder only
6509 FPU : S4;
6510 MEM : S3; // any mem
6511 %}
6512
6513 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6514 %{
6515 instruction_count(3);
6516 src1 : S3(read);
6517 src2 : S3(read);
6518 mem : S4(read);
6519 DECODE : S0; // any decoder for FPU PUSH
6520 D0 : S0(2); // big decoder only
6521 FPU : S4;
6522 MEM : S3(2); // any mem
6523 %}
6524
6525 pipe_class fpu_mem_mem(memory dst, memory src1)
6526 %{
6527 instruction_count(2);
6528 src1 : S3(read);
6529 dst : S4(read);
6530 D0 : S0(2); // big decoder only
6531 MEM : S3(2); // any mem
6532 %}
6533
6534 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6535 %{
6536 instruction_count(3);
6537 src1 : S3(read);
6538 src2 : S3(read);
6539 dst : S4(read);
6540 D0 : S0(3); // big decoder only
6541 FPU : S4;
6542 MEM : S3(3); // any mem
6543 %}
6544
6545 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6546 %{
6547 instruction_count(3);
6548 src1 : S4(read);
6549 mem : S4(read);
6550 DECODE : S0; // any decoder for FPU PUSH
6551 D0 : S0(2); // big decoder only
6552 FPU : S4;
6553 MEM : S3(2); // any mem
6554 %}
6555
6556 // Float load constant
6557 pipe_class fpu_reg_con(regD dst)
6558 %{
6559 instruction_count(2);
6560 dst : S5(write);
6561 D0 : S0; // big decoder only for the load
6562 DECODE : S1; // any decoder for FPU POP
6563 FPU : S4;
6564 MEM : S3; // any mem
6565 %}
6566
6567 // Float load constant
6568 pipe_class fpu_reg_reg_con(regD dst, regD src)
6569 %{
6570 instruction_count(3);
6571 dst : S5(write);
6572 src : S3(read);
6573 D0 : S0; // big decoder only for the load
6574 DECODE : S1(2); // any decoder for FPU POP
6575 FPU : S4;
6576 MEM : S3; // any mem
6577 %}
6578
6579 // UnConditional branch
6580 pipe_class pipe_jmp(label labl)
6581 %{
6582 single_instruction;
6583 BR : S3;
6584 %}
6585
6586 // Conditional branch
6587 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6588 %{
6589 single_instruction;
6590 cr : S1(read);
6591 BR : S3;
6592 %}
6593
6594 // Allocation idiom
6595 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6596 %{
6597 instruction_count(1); force_serialization;
6598 fixed_latency(6);
6599 heap_ptr : S3(read);
6600 DECODE : S0(3);
6601 D0 : S2;
6602 MEM : S3;
6603 ALU : S3(2);
6604 dst : S5(write);
6605 BR : S5;
6606 %}
6607
6608 // Generic big/slow expanded idiom
6609 pipe_class pipe_slow()
6610 %{
6611 instruction_count(10); multiple_bundles; force_serialization;
6612 fixed_latency(100);
6613 D0 : S0(2);
6614 MEM : S3(2);
6615 %}
6616
6617 // The real do-nothing guy
6618 pipe_class empty()
6619 %{
6620 instruction_count(0);
6621 %}
6622
6623 // Define the class for the Nop node
6624 define
6625 %{
6626 MachNop = empty;
6627 %}
6628
6629 %}
6630
6631 //----------INSTRUCTIONS-------------------------------------------------------
6632 //
6633 // match -- States which machine-independent subtree may be replaced
6634 // by this instruction.
6635 // ins_cost -- The estimated cost of this instruction is used by instruction
6636 // selection to identify a minimum cost tree of machine
6637 // instructions that matches a tree of machine-independent
6638 // instructions.
6639 // format -- A string providing the disassembly for this instruction.
6640 // The value of an instruction's operand may be inserted
6641 // by referring to it with a '$' prefix.
6642 // opcode -- Three instruction opcodes may be provided. These are referred
6643 // to within an encode class as $primary, $secondary, and $tertiary
6644 // rrspectively. The primary opcode is commonly used to
6645 // indicate the type of machine instruction, while secondary
6646 // and tertiary are often used for prefix options or addressing
6647 // modes.
6648 // ins_encode -- A list of encode classes with parameters. The encode class
6649 // name must have been defined in an 'enc_class' specification
6650 // in the encode section of the architecture description.
6651
6652 // ============================================================================
6653
6654 instruct ShouldNotReachHere() %{
6655 match(Halt);
6656 format %{ "stop\t# ShouldNotReachHere" %}
6657 ins_encode %{
6658 if (is_reachable()) {
6659 const char* str = __ code_string(_halt_reason);
6660 __ stop(str);
6661 }
6662 %}
6663 ins_pipe(pipe_slow);
6664 %}
6665
6666 // ============================================================================
6667
6668 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6669 // Load Float
6670 instruct MoveF2VL(vlRegF dst, regF src) %{
6671 match(Set dst src);
6672 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6673 ins_encode %{
6674 ShouldNotReachHere();
6675 %}
6676 ins_pipe( fpu_reg_reg );
6677 %}
6678
6679 // Load Float
6680 instruct MoveF2LEG(legRegF dst, regF src) %{
6681 match(Set dst src);
6682 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6683 ins_encode %{
6684 ShouldNotReachHere();
6685 %}
6686 ins_pipe( fpu_reg_reg );
6687 %}
6688
6689 // Load Float
6690 instruct MoveVL2F(regF dst, vlRegF src) %{
6691 match(Set dst src);
6692 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6693 ins_encode %{
6694 ShouldNotReachHere();
6695 %}
6696 ins_pipe( fpu_reg_reg );
6697 %}
6698
6699 // Load Float
6700 instruct MoveLEG2F(regF dst, legRegF src) %{
6701 match(Set dst src);
6702 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6703 ins_encode %{
6704 ShouldNotReachHere();
6705 %}
6706 ins_pipe( fpu_reg_reg );
6707 %}
6708
6709 // Load Double
6710 instruct MoveD2VL(vlRegD dst, regD src) %{
6711 match(Set dst src);
6712 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6713 ins_encode %{
6714 ShouldNotReachHere();
6715 %}
6716 ins_pipe( fpu_reg_reg );
6717 %}
6718
6719 // Load Double
6720 instruct MoveD2LEG(legRegD dst, regD src) %{
6721 match(Set dst src);
6722 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6723 ins_encode %{
6724 ShouldNotReachHere();
6725 %}
6726 ins_pipe( fpu_reg_reg );
6727 %}
6728
6729 // Load Double
6730 instruct MoveVL2D(regD dst, vlRegD src) %{
6731 match(Set dst src);
6732 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6733 ins_encode %{
6734 ShouldNotReachHere();
6735 %}
6736 ins_pipe( fpu_reg_reg );
6737 %}
6738
6739 // Load Double
6740 instruct MoveLEG2D(regD dst, legRegD src) %{
6741 match(Set dst src);
6742 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6743 ins_encode %{
6744 ShouldNotReachHere();
6745 %}
6746 ins_pipe( fpu_reg_reg );
6747 %}
6748
6749 //----------Load/Store/Move Instructions---------------------------------------
6750 //----------Load Instructions--------------------------------------------------
6751
6752 // Load Byte (8 bit signed)
6753 instruct loadB(rRegI dst, memory mem)
6754 %{
6755 match(Set dst (LoadB mem));
6756
6757 ins_cost(125);
6758 format %{ "movsbl $dst, $mem\t# byte" %}
6759
6760 ins_encode %{
6761 __ movsbl($dst$$Register, $mem$$Address);
6762 %}
6763
6764 ins_pipe(ialu_reg_mem);
6765 %}
6766
6767 // Load Byte (8 bit signed) into Long Register
6768 instruct loadB2L(rRegL dst, memory mem)
6769 %{
6770 match(Set dst (ConvI2L (LoadB mem)));
6771
6772 ins_cost(125);
6773 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6774
6775 ins_encode %{
6776 __ movsbq($dst$$Register, $mem$$Address);
6777 %}
6778
6779 ins_pipe(ialu_reg_mem);
6780 %}
6781
6782 // Load Unsigned Byte (8 bit UNsigned)
6783 instruct loadUB(rRegI dst, memory mem)
6784 %{
6785 match(Set dst (LoadUB mem));
6786
6787 ins_cost(125);
6788 format %{ "movzbl $dst, $mem\t# ubyte" %}
6789
6790 ins_encode %{
6791 __ movzbl($dst$$Register, $mem$$Address);
6792 %}
6793
6794 ins_pipe(ialu_reg_mem);
6795 %}
6796
6797 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6798 instruct loadUB2L(rRegL dst, memory mem)
6799 %{
6800 match(Set dst (ConvI2L (LoadUB mem)));
6801
6802 ins_cost(125);
6803 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6804
6805 ins_encode %{
6806 __ movzbq($dst$$Register, $mem$$Address);
6807 %}
6808
6809 ins_pipe(ialu_reg_mem);
6810 %}
6811
6812 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6813 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6814 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6815 effect(KILL cr);
6816
6817 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6818 "andl $dst, right_n_bits($mask, 8)" %}
6819 ins_encode %{
6820 Register Rdst = $dst$$Register;
6821 __ movzbq(Rdst, $mem$$Address);
6822 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6823 %}
6824 ins_pipe(ialu_reg_mem);
6825 %}
6826
6827 // Load Short (16 bit signed)
6828 instruct loadS(rRegI dst, memory mem)
6829 %{
6830 match(Set dst (LoadS mem));
6831
6832 ins_cost(125);
6833 format %{ "movswl $dst, $mem\t# short" %}
6834
6835 ins_encode %{
6836 __ movswl($dst$$Register, $mem$$Address);
6837 %}
6838
6839 ins_pipe(ialu_reg_mem);
6840 %}
6841
6842 // Load Short (16 bit signed) to Byte (8 bit signed)
6843 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6844 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6845
6846 ins_cost(125);
6847 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6848 ins_encode %{
6849 __ movsbl($dst$$Register, $mem$$Address);
6850 %}
6851 ins_pipe(ialu_reg_mem);
6852 %}
6853
6854 // Load Short (16 bit signed) into Long Register
6855 instruct loadS2L(rRegL dst, memory mem)
6856 %{
6857 match(Set dst (ConvI2L (LoadS mem)));
6858
6859 ins_cost(125);
6860 format %{ "movswq $dst, $mem\t# short -> long" %}
6861
6862 ins_encode %{
6863 __ movswq($dst$$Register, $mem$$Address);
6864 %}
6865
6866 ins_pipe(ialu_reg_mem);
6867 %}
6868
6869 // Load Unsigned Short/Char (16 bit UNsigned)
6870 instruct loadUS(rRegI dst, memory mem)
6871 %{
6872 match(Set dst (LoadUS mem));
6873
6874 ins_cost(125);
6875 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6876
6877 ins_encode %{
6878 __ movzwl($dst$$Register, $mem$$Address);
6879 %}
6880
6881 ins_pipe(ialu_reg_mem);
6882 %}
6883
6884 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6885 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6886 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6887
6888 ins_cost(125);
6889 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6890 ins_encode %{
6891 __ movsbl($dst$$Register, $mem$$Address);
6892 %}
6893 ins_pipe(ialu_reg_mem);
6894 %}
6895
6896 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6897 instruct loadUS2L(rRegL dst, memory mem)
6898 %{
6899 match(Set dst (ConvI2L (LoadUS mem)));
6900
6901 ins_cost(125);
6902 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6903
6904 ins_encode %{
6905 __ movzwq($dst$$Register, $mem$$Address);
6906 %}
6907
6908 ins_pipe(ialu_reg_mem);
6909 %}
6910
6911 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6912 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6913 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6914
6915 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
6916 ins_encode %{
6917 __ movzbq($dst$$Register, $mem$$Address);
6918 %}
6919 ins_pipe(ialu_reg_mem);
6920 %}
6921
6922 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
6923 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6924 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6925 effect(KILL cr);
6926
6927 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
6928 "andl $dst, right_n_bits($mask, 16)" %}
6929 ins_encode %{
6930 Register Rdst = $dst$$Register;
6931 __ movzwq(Rdst, $mem$$Address);
6932 __ andl(Rdst, $mask$$constant & right_n_bits(16));
6933 %}
6934 ins_pipe(ialu_reg_mem);
6935 %}
6936
6937 // Load Integer
6938 instruct loadI(rRegI dst, memory mem)
6939 %{
6940 match(Set dst (LoadI mem));
6941
6942 ins_cost(125);
6943 format %{ "movl $dst, $mem\t# int" %}
6944
6945 ins_encode %{
6946 __ movl($dst$$Register, $mem$$Address);
6947 %}
6948
6949 ins_pipe(ialu_reg_mem);
6950 %}
6951
6952 // Load Integer (32 bit signed) to Byte (8 bit signed)
6953 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6954 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6955
6956 ins_cost(125);
6957 format %{ "movsbl $dst, $mem\t# int -> byte" %}
6958 ins_encode %{
6959 __ movsbl($dst$$Register, $mem$$Address);
6960 %}
6961 ins_pipe(ialu_reg_mem);
6962 %}
6963
6964 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6965 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6966 match(Set dst (AndI (LoadI mem) mask));
6967
6968 ins_cost(125);
6969 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
6970 ins_encode %{
6971 __ movzbl($dst$$Register, $mem$$Address);
6972 %}
6973 ins_pipe(ialu_reg_mem);
6974 %}
6975
6976 // Load Integer (32 bit signed) to Short (16 bit signed)
6977 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6978 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6979
6980 ins_cost(125);
6981 format %{ "movswl $dst, $mem\t# int -> short" %}
6982 ins_encode %{
6983 __ movswl($dst$$Register, $mem$$Address);
6984 %}
6985 ins_pipe(ialu_reg_mem);
6986 %}
6987
6988 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6989 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6990 match(Set dst (AndI (LoadI mem) mask));
6991
6992 ins_cost(125);
6993 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
6994 ins_encode %{
6995 __ movzwl($dst$$Register, $mem$$Address);
6996 %}
6997 ins_pipe(ialu_reg_mem);
6998 %}
6999
7000 // Load Integer into Long Register
7001 instruct loadI2L(rRegL dst, memory mem)
7002 %{
7003 match(Set dst (ConvI2L (LoadI mem)));
7004
7005 ins_cost(125);
7006 format %{ "movslq $dst, $mem\t# int -> long" %}
7007
7008 ins_encode %{
7009 __ movslq($dst$$Register, $mem$$Address);
7010 %}
7011
7012 ins_pipe(ialu_reg_mem);
7013 %}
7014
7015 // Load Integer with mask 0xFF into Long Register
7016 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7017 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7018
7019 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7020 ins_encode %{
7021 __ movzbq($dst$$Register, $mem$$Address);
7022 %}
7023 ins_pipe(ialu_reg_mem);
7024 %}
7025
7026 // Load Integer with mask 0xFFFF into Long Register
7027 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7028 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7029
7030 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7031 ins_encode %{
7032 __ movzwq($dst$$Register, $mem$$Address);
7033 %}
7034 ins_pipe(ialu_reg_mem);
7035 %}
7036
7037 // Load Integer with a 31-bit mask into Long Register
7038 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7039 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7040 effect(KILL cr);
7041
7042 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7043 "andl $dst, $mask" %}
7044 ins_encode %{
7045 Register Rdst = $dst$$Register;
7046 __ movl(Rdst, $mem$$Address);
7047 __ andl(Rdst, $mask$$constant);
7048 %}
7049 ins_pipe(ialu_reg_mem);
7050 %}
7051
7052 // Load Unsigned Integer into Long Register
7053 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7054 %{
7055 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7056
7057 ins_cost(125);
7058 format %{ "movl $dst, $mem\t# uint -> long" %}
7059
7060 ins_encode %{
7061 __ movl($dst$$Register, $mem$$Address);
7062 %}
7063
7064 ins_pipe(ialu_reg_mem);
7065 %}
7066
7067 // Load Long
7068 instruct loadL(rRegL dst, memory mem)
7069 %{
7070 match(Set dst (LoadL mem));
7071
7072 ins_cost(125);
7073 format %{ "movq $dst, $mem\t# long" %}
7074
7075 ins_encode %{
7076 __ movq($dst$$Register, $mem$$Address);
7077 %}
7078
7079 ins_pipe(ialu_reg_mem); // XXX
7080 %}
7081
7082 // Load Range
7083 instruct loadRange(rRegI dst, memory mem)
7084 %{
7085 match(Set dst (LoadRange mem));
7086
7087 ins_cost(125); // XXX
7088 format %{ "movl $dst, $mem\t# range" %}
7089 ins_encode %{
7090 __ movl($dst$$Register, $mem$$Address);
7091 %}
7092 ins_pipe(ialu_reg_mem);
7093 %}
7094
7095 // Load Pointer
7096 instruct loadP(rRegP dst, memory mem)
7097 %{
7098 match(Set dst (LoadP mem));
7099 predicate(n->as_Load()->barrier_data() == 0);
7100
7101 ins_cost(125); // XXX
7102 format %{ "movq $dst, $mem\t# ptr" %}
7103 ins_encode %{
7104 __ movq($dst$$Register, $mem$$Address);
7105 %}
7106 ins_pipe(ialu_reg_mem); // XXX
7107 %}
7108
7109 // Load Compressed Pointer
7110 instruct loadN(rRegN dst, memory mem)
7111 %{
7112 predicate(n->as_Load()->barrier_data() == 0);
7113 match(Set dst (LoadN mem));
7114
7115 ins_cost(125); // XXX
7116 format %{ "movl $dst, $mem\t# compressed ptr" %}
7117 ins_encode %{
7118 __ movl($dst$$Register, $mem$$Address);
7119 %}
7120 ins_pipe(ialu_reg_mem); // XXX
7121 %}
7122
7123
7124 // Load Klass Pointer
7125 instruct loadKlass(rRegP dst, memory mem)
7126 %{
7127 match(Set dst (LoadKlass mem));
7128
7129 ins_cost(125); // XXX
7130 format %{ "movq $dst, $mem\t# class" %}
7131 ins_encode %{
7132 __ movq($dst$$Register, $mem$$Address);
7133 %}
7134 ins_pipe(ialu_reg_mem); // XXX
7135 %}
7136
7137 // Load narrow Klass Pointer
7138 instruct loadNKlass(rRegN dst, memory mem)
7139 %{
7140 predicate(!UseCompactObjectHeaders);
7141 match(Set dst (LoadNKlass mem));
7142
7143 ins_cost(125); // XXX
7144 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7145 ins_encode %{
7146 __ movl($dst$$Register, $mem$$Address);
7147 %}
7148 ins_pipe(ialu_reg_mem); // XXX
7149 %}
7150
7151 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7152 %{
7153 predicate(UseCompactObjectHeaders);
7154 match(Set dst (LoadNKlass mem));
7155 effect(KILL cr);
7156 ins_cost(125);
7157 format %{
7158 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7159 "shrl $dst, markWord::klass_shift_at_offset"
7160 %}
7161 ins_encode %{
7162 if (UseAPX) {
7163 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7164 }
7165 else {
7166 __ movl($dst$$Register, $mem$$Address);
7167 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7168 }
7169 %}
7170 ins_pipe(ialu_reg_mem);
7171 %}
7172
7173 // Load Float
7174 instruct loadF(regF dst, memory mem)
7175 %{
7176 match(Set dst (LoadF mem));
7177
7178 ins_cost(145); // XXX
7179 format %{ "movss $dst, $mem\t# float" %}
7180 ins_encode %{
7181 __ movflt($dst$$XMMRegister, $mem$$Address);
7182 %}
7183 ins_pipe(pipe_slow); // XXX
7184 %}
7185
7186 // Load Double
7187 instruct loadD_partial(regD dst, memory mem)
7188 %{
7189 predicate(!UseXmmLoadAndClearUpper);
7190 match(Set dst (LoadD mem));
7191
7192 ins_cost(145); // XXX
7193 format %{ "movlpd $dst, $mem\t# double" %}
7194 ins_encode %{
7195 __ movdbl($dst$$XMMRegister, $mem$$Address);
7196 %}
7197 ins_pipe(pipe_slow); // XXX
7198 %}
7199
7200 instruct loadD(regD dst, memory mem)
7201 %{
7202 predicate(UseXmmLoadAndClearUpper);
7203 match(Set dst (LoadD mem));
7204
7205 ins_cost(145); // XXX
7206 format %{ "movsd $dst, $mem\t# double" %}
7207 ins_encode %{
7208 __ movdbl($dst$$XMMRegister, $mem$$Address);
7209 %}
7210 ins_pipe(pipe_slow); // XXX
7211 %}
7212
7213 // max = java.lang.Math.max(float a, float b)
7214 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
7215 predicate(VM_Version::supports_avx10_2());
7216 match(Set dst (MaxF a b));
7217 format %{ "maxF $dst, $a, $b" %}
7218 ins_encode %{
7219 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7220 %}
7221 ins_pipe( pipe_slow );
7222 %}
7223
7224 // max = java.lang.Math.max(float a, float b)
7225 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7226 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7227 match(Set dst (MaxF a b));
7228 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7229 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7230 ins_encode %{
7231 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7232 %}
7233 ins_pipe( pipe_slow );
7234 %}
7235
7236 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7237 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7238 match(Set dst (MaxF a b));
7239 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7240
7241 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7242 ins_encode %{
7243 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7244 false /*min*/, true /*single*/);
7245 %}
7246 ins_pipe( pipe_slow );
7247 %}
7248
7249 // max = java.lang.Math.max(double a, double b)
7250 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
7251 predicate(VM_Version::supports_avx10_2());
7252 match(Set dst (MaxD a b));
7253 format %{ "maxD $dst, $a, $b" %}
7254 ins_encode %{
7255 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7256 %}
7257 ins_pipe( pipe_slow );
7258 %}
7259
7260 // max = java.lang.Math.max(double a, double b)
7261 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7262 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7263 match(Set dst (MaxD a b));
7264 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7265 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7266 ins_encode %{
7267 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7268 %}
7269 ins_pipe( pipe_slow );
7270 %}
7271
7272 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7273 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7274 match(Set dst (MaxD a b));
7275 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7276
7277 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7278 ins_encode %{
7279 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7280 false /*min*/, false /*single*/);
7281 %}
7282 ins_pipe( pipe_slow );
7283 %}
7284
7285 // max = java.lang.Math.min(float a, float b)
7286 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
7287 predicate(VM_Version::supports_avx10_2());
7288 match(Set dst (MinF a b));
7289 format %{ "minF $dst, $a, $b" %}
7290 ins_encode %{
7291 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7292 %}
7293 ins_pipe( pipe_slow );
7294 %}
7295
7296 // min = java.lang.Math.min(float a, float b)
7297 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7298 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7299 match(Set dst (MinF a b));
7300 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7301 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7302 ins_encode %{
7303 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7304 %}
7305 ins_pipe( pipe_slow );
7306 %}
7307
7308 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7309 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7310 match(Set dst (MinF a b));
7311 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7312
7313 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7314 ins_encode %{
7315 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7316 true /*min*/, true /*single*/);
7317 %}
7318 ins_pipe( pipe_slow );
7319 %}
7320
7321 // max = java.lang.Math.min(double a, double b)
7322 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
7323 predicate(VM_Version::supports_avx10_2());
7324 match(Set dst (MinD a b));
7325 format %{ "minD $dst, $a, $b" %}
7326 ins_encode %{
7327 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7328 %}
7329 ins_pipe( pipe_slow );
7330 %}
7331
7332 // min = java.lang.Math.min(double a, double b)
7333 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7334 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7335 match(Set dst (MinD a b));
7336 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7337 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7338 ins_encode %{
7339 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7340 %}
7341 ins_pipe( pipe_slow );
7342 %}
7343
7344 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7345 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7346 match(Set dst (MinD a b));
7347 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7348
7349 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7350 ins_encode %{
7351 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7352 true /*min*/, false /*single*/);
7353 %}
7354 ins_pipe( pipe_slow );
7355 %}
7356
7357 // Load Effective Address
7358 instruct leaP8(rRegP dst, indOffset8 mem)
7359 %{
7360 match(Set dst mem);
7361
7362 ins_cost(110); // XXX
7363 format %{ "leaq $dst, $mem\t# ptr 8" %}
7364 ins_encode %{
7365 __ leaq($dst$$Register, $mem$$Address);
7366 %}
7367 ins_pipe(ialu_reg_reg_fat);
7368 %}
7369
7370 instruct leaP32(rRegP dst, indOffset32 mem)
7371 %{
7372 match(Set dst mem);
7373
7374 ins_cost(110);
7375 format %{ "leaq $dst, $mem\t# ptr 32" %}
7376 ins_encode %{
7377 __ leaq($dst$$Register, $mem$$Address);
7378 %}
7379 ins_pipe(ialu_reg_reg_fat);
7380 %}
7381
7382 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7383 %{
7384 match(Set dst mem);
7385
7386 ins_cost(110);
7387 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7388 ins_encode %{
7389 __ leaq($dst$$Register, $mem$$Address);
7390 %}
7391 ins_pipe(ialu_reg_reg_fat);
7392 %}
7393
7394 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7395 %{
7396 match(Set dst mem);
7397
7398 ins_cost(110);
7399 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7400 ins_encode %{
7401 __ leaq($dst$$Register, $mem$$Address);
7402 %}
7403 ins_pipe(ialu_reg_reg_fat);
7404 %}
7405
7406 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7407 %{
7408 match(Set dst mem);
7409
7410 ins_cost(110);
7411 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7412 ins_encode %{
7413 __ leaq($dst$$Register, $mem$$Address);
7414 %}
7415 ins_pipe(ialu_reg_reg_fat);
7416 %}
7417
7418 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7419 %{
7420 match(Set dst mem);
7421
7422 ins_cost(110);
7423 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7424 ins_encode %{
7425 __ leaq($dst$$Register, $mem$$Address);
7426 %}
7427 ins_pipe(ialu_reg_reg_fat);
7428 %}
7429
7430 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7431 %{
7432 match(Set dst mem);
7433
7434 ins_cost(110);
7435 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7436 ins_encode %{
7437 __ leaq($dst$$Register, $mem$$Address);
7438 %}
7439 ins_pipe(ialu_reg_reg_fat);
7440 %}
7441
7442 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7443 %{
7444 match(Set dst mem);
7445
7446 ins_cost(110);
7447 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7448 ins_encode %{
7449 __ leaq($dst$$Register, $mem$$Address);
7450 %}
7451 ins_pipe(ialu_reg_reg_fat);
7452 %}
7453
7454 // Load Effective Address which uses Narrow (32-bits) oop
7455 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7456 %{
7457 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7458 match(Set dst mem);
7459
7460 ins_cost(110);
7461 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7462 ins_encode %{
7463 __ leaq($dst$$Register, $mem$$Address);
7464 %}
7465 ins_pipe(ialu_reg_reg_fat);
7466 %}
7467
7468 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7469 %{
7470 predicate(CompressedOops::shift() == 0);
7471 match(Set dst mem);
7472
7473 ins_cost(110); // XXX
7474 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7475 ins_encode %{
7476 __ leaq($dst$$Register, $mem$$Address);
7477 %}
7478 ins_pipe(ialu_reg_reg_fat);
7479 %}
7480
7481 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7482 %{
7483 predicate(CompressedOops::shift() == 0);
7484 match(Set dst mem);
7485
7486 ins_cost(110);
7487 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7488 ins_encode %{
7489 __ leaq($dst$$Register, $mem$$Address);
7490 %}
7491 ins_pipe(ialu_reg_reg_fat);
7492 %}
7493
7494 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7495 %{
7496 predicate(CompressedOops::shift() == 0);
7497 match(Set dst mem);
7498
7499 ins_cost(110);
7500 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7501 ins_encode %{
7502 __ leaq($dst$$Register, $mem$$Address);
7503 %}
7504 ins_pipe(ialu_reg_reg_fat);
7505 %}
7506
7507 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7508 %{
7509 predicate(CompressedOops::shift() == 0);
7510 match(Set dst mem);
7511
7512 ins_cost(110);
7513 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7514 ins_encode %{
7515 __ leaq($dst$$Register, $mem$$Address);
7516 %}
7517 ins_pipe(ialu_reg_reg_fat);
7518 %}
7519
7520 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7521 %{
7522 predicate(CompressedOops::shift() == 0);
7523 match(Set dst mem);
7524
7525 ins_cost(110);
7526 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7527 ins_encode %{
7528 __ leaq($dst$$Register, $mem$$Address);
7529 %}
7530 ins_pipe(ialu_reg_reg_fat);
7531 %}
7532
7533 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7534 %{
7535 predicate(CompressedOops::shift() == 0);
7536 match(Set dst mem);
7537
7538 ins_cost(110);
7539 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7540 ins_encode %{
7541 __ leaq($dst$$Register, $mem$$Address);
7542 %}
7543 ins_pipe(ialu_reg_reg_fat);
7544 %}
7545
7546 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7547 %{
7548 predicate(CompressedOops::shift() == 0);
7549 match(Set dst mem);
7550
7551 ins_cost(110);
7552 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7553 ins_encode %{
7554 __ leaq($dst$$Register, $mem$$Address);
7555 %}
7556 ins_pipe(ialu_reg_reg_fat);
7557 %}
7558
7559 instruct loadConI(rRegI dst, immI src)
7560 %{
7561 match(Set dst src);
7562
7563 format %{ "movl $dst, $src\t# int" %}
7564 ins_encode %{
7565 __ movl($dst$$Register, $src$$constant);
7566 %}
7567 ins_pipe(ialu_reg_fat); // XXX
7568 %}
7569
7570 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7571 %{
7572 match(Set dst src);
7573 effect(KILL cr);
7574
7575 ins_cost(50);
7576 format %{ "xorl $dst, $dst\t# int" %}
7577 ins_encode %{
7578 __ xorl($dst$$Register, $dst$$Register);
7579 %}
7580 ins_pipe(ialu_reg);
7581 %}
7582
7583 instruct loadConL(rRegL dst, immL src)
7584 %{
7585 match(Set dst src);
7586
7587 ins_cost(150);
7588 format %{ "movq $dst, $src\t# long" %}
7589 ins_encode %{
7590 __ mov64($dst$$Register, $src$$constant);
7591 %}
7592 ins_pipe(ialu_reg);
7593 %}
7594
7595 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7596 %{
7597 match(Set dst src);
7598 effect(KILL cr);
7599
7600 ins_cost(50);
7601 format %{ "xorl $dst, $dst\t# long" %}
7602 ins_encode %{
7603 __ xorl($dst$$Register, $dst$$Register);
7604 %}
7605 ins_pipe(ialu_reg); // XXX
7606 %}
7607
7608 instruct loadConUL32(rRegL dst, immUL32 src)
7609 %{
7610 match(Set dst src);
7611
7612 ins_cost(60);
7613 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7614 ins_encode %{
7615 __ movl($dst$$Register, $src$$constant);
7616 %}
7617 ins_pipe(ialu_reg);
7618 %}
7619
7620 instruct loadConL32(rRegL dst, immL32 src)
7621 %{
7622 match(Set dst src);
7623
7624 ins_cost(70);
7625 format %{ "movq $dst, $src\t# long (32-bit)" %}
7626 ins_encode %{
7627 __ movq($dst$$Register, $src$$constant);
7628 %}
7629 ins_pipe(ialu_reg);
7630 %}
7631
7632 instruct loadConP(rRegP dst, immP con) %{
7633 match(Set dst con);
7634
7635 format %{ "movq $dst, $con\t# ptr" %}
7636 ins_encode %{
7637 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7638 %}
7639 ins_pipe(ialu_reg_fat); // XXX
7640 %}
7641
7642 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7643 %{
7644 match(Set dst src);
7645 effect(KILL cr);
7646
7647 ins_cost(50);
7648 format %{ "xorl $dst, $dst\t# ptr" %}
7649 ins_encode %{
7650 __ xorl($dst$$Register, $dst$$Register);
7651 %}
7652 ins_pipe(ialu_reg);
7653 %}
7654
7655 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7656 %{
7657 match(Set dst src);
7658 effect(KILL cr);
7659
7660 ins_cost(60);
7661 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7662 ins_encode %{
7663 __ movl($dst$$Register, $src$$constant);
7664 %}
7665 ins_pipe(ialu_reg);
7666 %}
7667
7668 instruct loadConF(regF dst, immF con) %{
7669 match(Set dst con);
7670 ins_cost(125);
7671 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7672 ins_encode %{
7673 __ movflt($dst$$XMMRegister, $constantaddress($con));
7674 %}
7675 ins_pipe(pipe_slow);
7676 %}
7677
7678 instruct loadConH(regF dst, immH con) %{
7679 match(Set dst con);
7680 ins_cost(125);
7681 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7682 ins_encode %{
7683 __ movflt($dst$$XMMRegister, $constantaddress($con));
7684 %}
7685 ins_pipe(pipe_slow);
7686 %}
7687
7688 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7689 match(Set dst src);
7690 effect(KILL cr);
7691 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7692 ins_encode %{
7693 __ xorq($dst$$Register, $dst$$Register);
7694 %}
7695 ins_pipe(ialu_reg);
7696 %}
7697
7698 instruct loadConN(rRegN dst, immN src) %{
7699 match(Set dst src);
7700
7701 ins_cost(125);
7702 format %{ "movl $dst, $src\t# compressed ptr" %}
7703 ins_encode %{
7704 address con = (address)$src$$constant;
7705 if (con == nullptr) {
7706 ShouldNotReachHere();
7707 } else {
7708 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7709 }
7710 %}
7711 ins_pipe(ialu_reg_fat); // XXX
7712 %}
7713
7714 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7715 match(Set dst src);
7716
7717 ins_cost(125);
7718 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7719 ins_encode %{
7720 address con = (address)$src$$constant;
7721 if (con == nullptr) {
7722 ShouldNotReachHere();
7723 } else {
7724 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7725 }
7726 %}
7727 ins_pipe(ialu_reg_fat); // XXX
7728 %}
7729
7730 instruct loadConF0(regF dst, immF0 src)
7731 %{
7732 match(Set dst src);
7733 ins_cost(100);
7734
7735 format %{ "xorps $dst, $dst\t# float 0.0" %}
7736 ins_encode %{
7737 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7738 %}
7739 ins_pipe(pipe_slow);
7740 %}
7741
7742 // Use the same format since predicate() can not be used here.
7743 instruct loadConD(regD dst, immD con) %{
7744 match(Set dst con);
7745 ins_cost(125);
7746 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7747 ins_encode %{
7748 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7749 %}
7750 ins_pipe(pipe_slow);
7751 %}
7752
7753 instruct loadConD0(regD dst, immD0 src)
7754 %{
7755 match(Set dst src);
7756 ins_cost(100);
7757
7758 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7759 ins_encode %{
7760 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7761 %}
7762 ins_pipe(pipe_slow);
7763 %}
7764
7765 instruct loadSSI(rRegI dst, stackSlotI src)
7766 %{
7767 match(Set dst src);
7768
7769 ins_cost(125);
7770 format %{ "movl $dst, $src\t# int stk" %}
7771 ins_encode %{
7772 __ movl($dst$$Register, $src$$Address);
7773 %}
7774 ins_pipe(ialu_reg_mem);
7775 %}
7776
7777 instruct loadSSL(rRegL dst, stackSlotL src)
7778 %{
7779 match(Set dst src);
7780
7781 ins_cost(125);
7782 format %{ "movq $dst, $src\t# long stk" %}
7783 ins_encode %{
7784 __ movq($dst$$Register, $src$$Address);
7785 %}
7786 ins_pipe(ialu_reg_mem);
7787 %}
7788
7789 instruct loadSSP(rRegP dst, stackSlotP src)
7790 %{
7791 match(Set dst src);
7792
7793 ins_cost(125);
7794 format %{ "movq $dst, $src\t# ptr stk" %}
7795 ins_encode %{
7796 __ movq($dst$$Register, $src$$Address);
7797 %}
7798 ins_pipe(ialu_reg_mem);
7799 %}
7800
7801 instruct loadSSF(regF dst, stackSlotF src)
7802 %{
7803 match(Set dst src);
7804
7805 ins_cost(125);
7806 format %{ "movss $dst, $src\t# float stk" %}
7807 ins_encode %{
7808 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7809 %}
7810 ins_pipe(pipe_slow); // XXX
7811 %}
7812
7813 // Use the same format since predicate() can not be used here.
7814 instruct loadSSD(regD dst, stackSlotD src)
7815 %{
7816 match(Set dst src);
7817
7818 ins_cost(125);
7819 format %{ "movsd $dst, $src\t# double stk" %}
7820 ins_encode %{
7821 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7822 %}
7823 ins_pipe(pipe_slow); // XXX
7824 %}
7825
7826 // Prefetch instructions for allocation.
7827 // Must be safe to execute with invalid address (cannot fault).
7828
7829 instruct prefetchAlloc( memory mem ) %{
7830 predicate(AllocatePrefetchInstr==3);
7831 match(PrefetchAllocation mem);
7832 ins_cost(125);
7833
7834 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7835 ins_encode %{
7836 __ prefetchw($mem$$Address);
7837 %}
7838 ins_pipe(ialu_mem);
7839 %}
7840
7841 instruct prefetchAllocNTA( memory mem ) %{
7842 predicate(AllocatePrefetchInstr==0);
7843 match(PrefetchAllocation mem);
7844 ins_cost(125);
7845
7846 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7847 ins_encode %{
7848 __ prefetchnta($mem$$Address);
7849 %}
7850 ins_pipe(ialu_mem);
7851 %}
7852
7853 instruct prefetchAllocT0( memory mem ) %{
7854 predicate(AllocatePrefetchInstr==1);
7855 match(PrefetchAllocation mem);
7856 ins_cost(125);
7857
7858 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7859 ins_encode %{
7860 __ prefetcht0($mem$$Address);
7861 %}
7862 ins_pipe(ialu_mem);
7863 %}
7864
7865 instruct prefetchAllocT2( memory mem ) %{
7866 predicate(AllocatePrefetchInstr==2);
7867 match(PrefetchAllocation mem);
7868 ins_cost(125);
7869
7870 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7871 ins_encode %{
7872 __ prefetcht2($mem$$Address);
7873 %}
7874 ins_pipe(ialu_mem);
7875 %}
7876
7877 //----------Store Instructions-------------------------------------------------
7878
7879 // Store Byte
7880 instruct storeB(memory mem, rRegI src)
7881 %{
7882 match(Set mem (StoreB mem src));
7883
7884 ins_cost(125); // XXX
7885 format %{ "movb $mem, $src\t# byte" %}
7886 ins_encode %{
7887 __ movb($mem$$Address, $src$$Register);
7888 %}
7889 ins_pipe(ialu_mem_reg);
7890 %}
7891
7892 // Store Char/Short
7893 instruct storeC(memory mem, rRegI src)
7894 %{
7895 match(Set mem (StoreC mem src));
7896
7897 ins_cost(125); // XXX
7898 format %{ "movw $mem, $src\t# char/short" %}
7899 ins_encode %{
7900 __ movw($mem$$Address, $src$$Register);
7901 %}
7902 ins_pipe(ialu_mem_reg);
7903 %}
7904
7905 // Store Integer
7906 instruct storeI(memory mem, rRegI src)
7907 %{
7908 match(Set mem (StoreI mem src));
7909
7910 ins_cost(125); // XXX
7911 format %{ "movl $mem, $src\t# int" %}
7912 ins_encode %{
7913 __ movl($mem$$Address, $src$$Register);
7914 %}
7915 ins_pipe(ialu_mem_reg);
7916 %}
7917
7918 // Store Long
7919 instruct storeL(memory mem, rRegL src)
7920 %{
7921 match(Set mem (StoreL mem src));
7922
7923 ins_cost(125); // XXX
7924 format %{ "movq $mem, $src\t# long" %}
7925 ins_encode %{
7926 __ movq($mem$$Address, $src$$Register);
7927 %}
7928 ins_pipe(ialu_mem_reg); // XXX
7929 %}
7930
7931 // Store Pointer
7932 instruct storeP(memory mem, any_RegP src)
7933 %{
7934 predicate(n->as_Store()->barrier_data() == 0);
7935 match(Set mem (StoreP mem src));
7936
7937 ins_cost(125); // XXX
7938 format %{ "movq $mem, $src\t# ptr" %}
7939 ins_encode %{
7940 __ movq($mem$$Address, $src$$Register);
7941 %}
7942 ins_pipe(ialu_mem_reg);
7943 %}
7944
7945 instruct storeImmP0(memory mem, immP0 zero)
7946 %{
7947 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
7948 match(Set mem (StoreP mem zero));
7949
7950 ins_cost(125); // XXX
7951 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
7952 ins_encode %{
7953 __ movq($mem$$Address, r12);
7954 %}
7955 ins_pipe(ialu_mem_reg);
7956 %}
7957
7958 // Store Null Pointer, mark word, or other simple pointer constant.
7959 instruct storeImmP(memory mem, immP31 src)
7960 %{
7961 predicate(n->as_Store()->barrier_data() == 0);
7962 match(Set mem (StoreP mem src));
7963
7964 ins_cost(150); // XXX
7965 format %{ "movq $mem, $src\t# ptr" %}
7966 ins_encode %{
7967 __ movq($mem$$Address, $src$$constant);
7968 %}
7969 ins_pipe(ialu_mem_imm);
7970 %}
7971
7972 // Store Compressed Pointer
7973 instruct storeN(memory mem, rRegN src)
7974 %{
7975 predicate(n->as_Store()->barrier_data() == 0);
7976 match(Set mem (StoreN mem src));
7977
7978 ins_cost(125); // XXX
7979 format %{ "movl $mem, $src\t# compressed ptr" %}
7980 ins_encode %{
7981 __ movl($mem$$Address, $src$$Register);
7982 %}
7983 ins_pipe(ialu_mem_reg);
7984 %}
7985
7986 instruct storeNKlass(memory mem, rRegN src)
7987 %{
7988 match(Set mem (StoreNKlass mem src));
7989
7990 ins_cost(125); // XXX
7991 format %{ "movl $mem, $src\t# compressed klass ptr" %}
7992 ins_encode %{
7993 __ movl($mem$$Address, $src$$Register);
7994 %}
7995 ins_pipe(ialu_mem_reg);
7996 %}
7997
7998 instruct storeImmN0(memory mem, immN0 zero)
7999 %{
8000 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8001 match(Set mem (StoreN mem zero));
8002
8003 ins_cost(125); // XXX
8004 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8005 ins_encode %{
8006 __ movl($mem$$Address, r12);
8007 %}
8008 ins_pipe(ialu_mem_reg);
8009 %}
8010
8011 instruct storeImmN(memory mem, immN src)
8012 %{
8013 predicate(n->as_Store()->barrier_data() == 0);
8014 match(Set mem (StoreN mem src));
8015
8016 ins_cost(150); // XXX
8017 format %{ "movl $mem, $src\t# compressed ptr" %}
8018 ins_encode %{
8019 address con = (address)$src$$constant;
8020 if (con == nullptr) {
8021 __ movl($mem$$Address, 0);
8022 } else {
8023 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8024 }
8025 %}
8026 ins_pipe(ialu_mem_imm);
8027 %}
8028
8029 instruct storeImmNKlass(memory mem, immNKlass src)
8030 %{
8031 match(Set mem (StoreNKlass mem src));
8032
8033 ins_cost(150); // XXX
8034 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8035 ins_encode %{
8036 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8037 %}
8038 ins_pipe(ialu_mem_imm);
8039 %}
8040
8041 // Store Integer Immediate
8042 instruct storeImmI0(memory mem, immI_0 zero)
8043 %{
8044 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8045 match(Set mem (StoreI mem zero));
8046
8047 ins_cost(125); // XXX
8048 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8049 ins_encode %{
8050 __ movl($mem$$Address, r12);
8051 %}
8052 ins_pipe(ialu_mem_reg);
8053 %}
8054
8055 instruct storeImmI(memory mem, immI src)
8056 %{
8057 match(Set mem (StoreI mem src));
8058
8059 ins_cost(150);
8060 format %{ "movl $mem, $src\t# int" %}
8061 ins_encode %{
8062 __ movl($mem$$Address, $src$$constant);
8063 %}
8064 ins_pipe(ialu_mem_imm);
8065 %}
8066
8067 // Store Long Immediate
8068 instruct storeImmL0(memory mem, immL0 zero)
8069 %{
8070 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8071 match(Set mem (StoreL mem zero));
8072
8073 ins_cost(125); // XXX
8074 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8075 ins_encode %{
8076 __ movq($mem$$Address, r12);
8077 %}
8078 ins_pipe(ialu_mem_reg);
8079 %}
8080
8081 instruct storeImmL(memory mem, immL32 src)
8082 %{
8083 match(Set mem (StoreL mem src));
8084
8085 ins_cost(150);
8086 format %{ "movq $mem, $src\t# long" %}
8087 ins_encode %{
8088 __ movq($mem$$Address, $src$$constant);
8089 %}
8090 ins_pipe(ialu_mem_imm);
8091 %}
8092
8093 // Store Short/Char Immediate
8094 instruct storeImmC0(memory mem, immI_0 zero)
8095 %{
8096 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8097 match(Set mem (StoreC mem zero));
8098
8099 ins_cost(125); // XXX
8100 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8101 ins_encode %{
8102 __ movw($mem$$Address, r12);
8103 %}
8104 ins_pipe(ialu_mem_reg);
8105 %}
8106
8107 instruct storeImmI16(memory mem, immI16 src)
8108 %{
8109 predicate(UseStoreImmI16);
8110 match(Set mem (StoreC mem src));
8111
8112 ins_cost(150);
8113 format %{ "movw $mem, $src\t# short/char" %}
8114 ins_encode %{
8115 __ movw($mem$$Address, $src$$constant);
8116 %}
8117 ins_pipe(ialu_mem_imm);
8118 %}
8119
8120 // Store Byte Immediate
8121 instruct storeImmB0(memory mem, immI_0 zero)
8122 %{
8123 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8124 match(Set mem (StoreB mem zero));
8125
8126 ins_cost(125); // XXX
8127 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8128 ins_encode %{
8129 __ movb($mem$$Address, r12);
8130 %}
8131 ins_pipe(ialu_mem_reg);
8132 %}
8133
8134 instruct storeImmB(memory mem, immI8 src)
8135 %{
8136 match(Set mem (StoreB mem src));
8137
8138 ins_cost(150); // XXX
8139 format %{ "movb $mem, $src\t# byte" %}
8140 ins_encode %{
8141 __ movb($mem$$Address, $src$$constant);
8142 %}
8143 ins_pipe(ialu_mem_imm);
8144 %}
8145
8146 // Store Float
8147 instruct storeF(memory mem, regF src)
8148 %{
8149 match(Set mem (StoreF mem src));
8150
8151 ins_cost(95); // XXX
8152 format %{ "movss $mem, $src\t# float" %}
8153 ins_encode %{
8154 __ movflt($mem$$Address, $src$$XMMRegister);
8155 %}
8156 ins_pipe(pipe_slow); // XXX
8157 %}
8158
8159 // Store immediate Float value (it is faster than store from XMM register)
8160 instruct storeF0(memory mem, immF0 zero)
8161 %{
8162 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8163 match(Set mem (StoreF mem zero));
8164
8165 ins_cost(25); // XXX
8166 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8167 ins_encode %{
8168 __ movl($mem$$Address, r12);
8169 %}
8170 ins_pipe(ialu_mem_reg);
8171 %}
8172
8173 instruct storeF_imm(memory mem, immF src)
8174 %{
8175 match(Set mem (StoreF mem src));
8176
8177 ins_cost(50);
8178 format %{ "movl $mem, $src\t# float" %}
8179 ins_encode %{
8180 __ movl($mem$$Address, jint_cast($src$$constant));
8181 %}
8182 ins_pipe(ialu_mem_imm);
8183 %}
8184
8185 // Store Double
8186 instruct storeD(memory mem, regD src)
8187 %{
8188 match(Set mem (StoreD mem src));
8189
8190 ins_cost(95); // XXX
8191 format %{ "movsd $mem, $src\t# double" %}
8192 ins_encode %{
8193 __ movdbl($mem$$Address, $src$$XMMRegister);
8194 %}
8195 ins_pipe(pipe_slow); // XXX
8196 %}
8197
8198 // Store immediate double 0.0 (it is faster than store from XMM register)
8199 instruct storeD0_imm(memory mem, immD0 src)
8200 %{
8201 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8202 match(Set mem (StoreD mem src));
8203
8204 ins_cost(50);
8205 format %{ "movq $mem, $src\t# double 0." %}
8206 ins_encode %{
8207 __ movq($mem$$Address, $src$$constant);
8208 %}
8209 ins_pipe(ialu_mem_imm);
8210 %}
8211
8212 instruct storeD0(memory mem, immD0 zero)
8213 %{
8214 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8215 match(Set mem (StoreD mem zero));
8216
8217 ins_cost(25); // XXX
8218 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8219 ins_encode %{
8220 __ movq($mem$$Address, r12);
8221 %}
8222 ins_pipe(ialu_mem_reg);
8223 %}
8224
8225 instruct storeSSI(stackSlotI dst, rRegI src)
8226 %{
8227 match(Set dst src);
8228
8229 ins_cost(100);
8230 format %{ "movl $dst, $src\t# int stk" %}
8231 ins_encode %{
8232 __ movl($dst$$Address, $src$$Register);
8233 %}
8234 ins_pipe( ialu_mem_reg );
8235 %}
8236
8237 instruct storeSSL(stackSlotL dst, rRegL src)
8238 %{
8239 match(Set dst src);
8240
8241 ins_cost(100);
8242 format %{ "movq $dst, $src\t# long stk" %}
8243 ins_encode %{
8244 __ movq($dst$$Address, $src$$Register);
8245 %}
8246 ins_pipe(ialu_mem_reg);
8247 %}
8248
8249 instruct storeSSP(stackSlotP dst, rRegP src)
8250 %{
8251 match(Set dst src);
8252
8253 ins_cost(100);
8254 format %{ "movq $dst, $src\t# ptr stk" %}
8255 ins_encode %{
8256 __ movq($dst$$Address, $src$$Register);
8257 %}
8258 ins_pipe(ialu_mem_reg);
8259 %}
8260
8261 instruct storeSSF(stackSlotF dst, regF src)
8262 %{
8263 match(Set dst src);
8264
8265 ins_cost(95); // XXX
8266 format %{ "movss $dst, $src\t# float stk" %}
8267 ins_encode %{
8268 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8269 %}
8270 ins_pipe(pipe_slow); // XXX
8271 %}
8272
8273 instruct storeSSD(stackSlotD dst, regD src)
8274 %{
8275 match(Set dst src);
8276
8277 ins_cost(95); // XXX
8278 format %{ "movsd $dst, $src\t# double stk" %}
8279 ins_encode %{
8280 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8281 %}
8282 ins_pipe(pipe_slow); // XXX
8283 %}
8284
8285 instruct cacheWB(indirect addr)
8286 %{
8287 predicate(VM_Version::supports_data_cache_line_flush());
8288 match(CacheWB addr);
8289
8290 ins_cost(100);
8291 format %{"cache wb $addr" %}
8292 ins_encode %{
8293 assert($addr->index_position() < 0, "should be");
8294 assert($addr$$disp == 0, "should be");
8295 __ cache_wb(Address($addr$$base$$Register, 0));
8296 %}
8297 ins_pipe(pipe_slow); // XXX
8298 %}
8299
8300 instruct cacheWBPreSync()
8301 %{
8302 predicate(VM_Version::supports_data_cache_line_flush());
8303 match(CacheWBPreSync);
8304
8305 ins_cost(100);
8306 format %{"cache wb presync" %}
8307 ins_encode %{
8308 __ cache_wbsync(true);
8309 %}
8310 ins_pipe(pipe_slow); // XXX
8311 %}
8312
8313 instruct cacheWBPostSync()
8314 %{
8315 predicate(VM_Version::supports_data_cache_line_flush());
8316 match(CacheWBPostSync);
8317
8318 ins_cost(100);
8319 format %{"cache wb postsync" %}
8320 ins_encode %{
8321 __ cache_wbsync(false);
8322 %}
8323 ins_pipe(pipe_slow); // XXX
8324 %}
8325
8326 //----------BSWAP Instructions-------------------------------------------------
8327 instruct bytes_reverse_int(rRegI dst) %{
8328 match(Set dst (ReverseBytesI dst));
8329
8330 format %{ "bswapl $dst" %}
8331 ins_encode %{
8332 __ bswapl($dst$$Register);
8333 %}
8334 ins_pipe( ialu_reg );
8335 %}
8336
8337 instruct bytes_reverse_long(rRegL dst) %{
8338 match(Set dst (ReverseBytesL dst));
8339
8340 format %{ "bswapq $dst" %}
8341 ins_encode %{
8342 __ bswapq($dst$$Register);
8343 %}
8344 ins_pipe( ialu_reg);
8345 %}
8346
8347 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8348 match(Set dst (ReverseBytesUS dst));
8349 effect(KILL cr);
8350
8351 format %{ "bswapl $dst\n\t"
8352 "shrl $dst,16\n\t" %}
8353 ins_encode %{
8354 __ bswapl($dst$$Register);
8355 __ shrl($dst$$Register, 16);
8356 %}
8357 ins_pipe( ialu_reg );
8358 %}
8359
8360 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8361 match(Set dst (ReverseBytesS dst));
8362 effect(KILL cr);
8363
8364 format %{ "bswapl $dst\n\t"
8365 "sar $dst,16\n\t" %}
8366 ins_encode %{
8367 __ bswapl($dst$$Register);
8368 __ sarl($dst$$Register, 16);
8369 %}
8370 ins_pipe( ialu_reg );
8371 %}
8372
8373 //---------- Zeros Count Instructions ------------------------------------------
8374
8375 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8376 predicate(UseCountLeadingZerosInstruction);
8377 match(Set dst (CountLeadingZerosI src));
8378 effect(KILL cr);
8379
8380 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8381 ins_encode %{
8382 __ lzcntl($dst$$Register, $src$$Register);
8383 %}
8384 ins_pipe(ialu_reg);
8385 %}
8386
8387 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8388 predicate(UseCountLeadingZerosInstruction);
8389 match(Set dst (CountLeadingZerosI (LoadI src)));
8390 effect(KILL cr);
8391 ins_cost(175);
8392 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8393 ins_encode %{
8394 __ lzcntl($dst$$Register, $src$$Address);
8395 %}
8396 ins_pipe(ialu_reg_mem);
8397 %}
8398
8399 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8400 predicate(!UseCountLeadingZerosInstruction);
8401 match(Set dst (CountLeadingZerosI src));
8402 effect(KILL cr);
8403
8404 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8405 "jnz skip\n\t"
8406 "movl $dst, -1\n"
8407 "skip:\n\t"
8408 "negl $dst\n\t"
8409 "addl $dst, 31" %}
8410 ins_encode %{
8411 Register Rdst = $dst$$Register;
8412 Register Rsrc = $src$$Register;
8413 Label skip;
8414 __ bsrl(Rdst, Rsrc);
8415 __ jccb(Assembler::notZero, skip);
8416 __ movl(Rdst, -1);
8417 __ bind(skip);
8418 __ negl(Rdst);
8419 __ addl(Rdst, BitsPerInt - 1);
8420 %}
8421 ins_pipe(ialu_reg);
8422 %}
8423
8424 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8425 predicate(UseCountLeadingZerosInstruction);
8426 match(Set dst (CountLeadingZerosL src));
8427 effect(KILL cr);
8428
8429 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8430 ins_encode %{
8431 __ lzcntq($dst$$Register, $src$$Register);
8432 %}
8433 ins_pipe(ialu_reg);
8434 %}
8435
8436 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8437 predicate(UseCountLeadingZerosInstruction);
8438 match(Set dst (CountLeadingZerosL (LoadL src)));
8439 effect(KILL cr);
8440 ins_cost(175);
8441 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8442 ins_encode %{
8443 __ lzcntq($dst$$Register, $src$$Address);
8444 %}
8445 ins_pipe(ialu_reg_mem);
8446 %}
8447
8448 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8449 predicate(!UseCountLeadingZerosInstruction);
8450 match(Set dst (CountLeadingZerosL src));
8451 effect(KILL cr);
8452
8453 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8454 "jnz skip\n\t"
8455 "movl $dst, -1\n"
8456 "skip:\n\t"
8457 "negl $dst\n\t"
8458 "addl $dst, 63" %}
8459 ins_encode %{
8460 Register Rdst = $dst$$Register;
8461 Register Rsrc = $src$$Register;
8462 Label skip;
8463 __ bsrq(Rdst, Rsrc);
8464 __ jccb(Assembler::notZero, skip);
8465 __ movl(Rdst, -1);
8466 __ bind(skip);
8467 __ negl(Rdst);
8468 __ addl(Rdst, BitsPerLong - 1);
8469 %}
8470 ins_pipe(ialu_reg);
8471 %}
8472
8473 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8474 predicate(UseCountTrailingZerosInstruction);
8475 match(Set dst (CountTrailingZerosI src));
8476 effect(KILL cr);
8477
8478 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8479 ins_encode %{
8480 __ tzcntl($dst$$Register, $src$$Register);
8481 %}
8482 ins_pipe(ialu_reg);
8483 %}
8484
8485 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8486 predicate(UseCountTrailingZerosInstruction);
8487 match(Set dst (CountTrailingZerosI (LoadI src)));
8488 effect(KILL cr);
8489 ins_cost(175);
8490 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8491 ins_encode %{
8492 __ tzcntl($dst$$Register, $src$$Address);
8493 %}
8494 ins_pipe(ialu_reg_mem);
8495 %}
8496
8497 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8498 predicate(!UseCountTrailingZerosInstruction);
8499 match(Set dst (CountTrailingZerosI src));
8500 effect(KILL cr);
8501
8502 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8503 "jnz done\n\t"
8504 "movl $dst, 32\n"
8505 "done:" %}
8506 ins_encode %{
8507 Register Rdst = $dst$$Register;
8508 Label done;
8509 __ bsfl(Rdst, $src$$Register);
8510 __ jccb(Assembler::notZero, done);
8511 __ movl(Rdst, BitsPerInt);
8512 __ bind(done);
8513 %}
8514 ins_pipe(ialu_reg);
8515 %}
8516
8517 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8518 predicate(UseCountTrailingZerosInstruction);
8519 match(Set dst (CountTrailingZerosL src));
8520 effect(KILL cr);
8521
8522 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8523 ins_encode %{
8524 __ tzcntq($dst$$Register, $src$$Register);
8525 %}
8526 ins_pipe(ialu_reg);
8527 %}
8528
8529 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8530 predicate(UseCountTrailingZerosInstruction);
8531 match(Set dst (CountTrailingZerosL (LoadL src)));
8532 effect(KILL cr);
8533 ins_cost(175);
8534 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8535 ins_encode %{
8536 __ tzcntq($dst$$Register, $src$$Address);
8537 %}
8538 ins_pipe(ialu_reg_mem);
8539 %}
8540
8541 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8542 predicate(!UseCountTrailingZerosInstruction);
8543 match(Set dst (CountTrailingZerosL src));
8544 effect(KILL cr);
8545
8546 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8547 "jnz done\n\t"
8548 "movl $dst, 64\n"
8549 "done:" %}
8550 ins_encode %{
8551 Register Rdst = $dst$$Register;
8552 Label done;
8553 __ bsfq(Rdst, $src$$Register);
8554 __ jccb(Assembler::notZero, done);
8555 __ movl(Rdst, BitsPerLong);
8556 __ bind(done);
8557 %}
8558 ins_pipe(ialu_reg);
8559 %}
8560
8561 //--------------- Reverse Operation Instructions ----------------
8562 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8563 predicate(!VM_Version::supports_gfni());
8564 match(Set dst (ReverseI src));
8565 effect(TEMP dst, TEMP rtmp, KILL cr);
8566 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8567 ins_encode %{
8568 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8569 %}
8570 ins_pipe( ialu_reg );
8571 %}
8572
8573 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8574 predicate(VM_Version::supports_gfni());
8575 match(Set dst (ReverseI src));
8576 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8577 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8578 ins_encode %{
8579 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8580 %}
8581 ins_pipe( ialu_reg );
8582 %}
8583
8584 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8585 predicate(!VM_Version::supports_gfni());
8586 match(Set dst (ReverseL src));
8587 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8588 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8589 ins_encode %{
8590 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8591 %}
8592 ins_pipe( ialu_reg );
8593 %}
8594
8595 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8596 predicate(VM_Version::supports_gfni());
8597 match(Set dst (ReverseL src));
8598 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8599 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8600 ins_encode %{
8601 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8602 %}
8603 ins_pipe( ialu_reg );
8604 %}
8605
8606 //---------- Population Count Instructions -------------------------------------
8607
8608 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8609 predicate(UsePopCountInstruction);
8610 match(Set dst (PopCountI src));
8611 effect(KILL cr);
8612
8613 format %{ "popcnt $dst, $src" %}
8614 ins_encode %{
8615 __ popcntl($dst$$Register, $src$$Register);
8616 %}
8617 ins_pipe(ialu_reg);
8618 %}
8619
8620 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8621 predicate(UsePopCountInstruction);
8622 match(Set dst (PopCountI (LoadI mem)));
8623 effect(KILL cr);
8624
8625 format %{ "popcnt $dst, $mem" %}
8626 ins_encode %{
8627 __ popcntl($dst$$Register, $mem$$Address);
8628 %}
8629 ins_pipe(ialu_reg);
8630 %}
8631
8632 // Note: Long.bitCount(long) returns an int.
8633 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8634 predicate(UsePopCountInstruction);
8635 match(Set dst (PopCountL src));
8636 effect(KILL cr);
8637
8638 format %{ "popcnt $dst, $src" %}
8639 ins_encode %{
8640 __ popcntq($dst$$Register, $src$$Register);
8641 %}
8642 ins_pipe(ialu_reg);
8643 %}
8644
8645 // Note: Long.bitCount(long) returns an int.
8646 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8647 predicate(UsePopCountInstruction);
8648 match(Set dst (PopCountL (LoadL mem)));
8649 effect(KILL cr);
8650
8651 format %{ "popcnt $dst, $mem" %}
8652 ins_encode %{
8653 __ popcntq($dst$$Register, $mem$$Address);
8654 %}
8655 ins_pipe(ialu_reg);
8656 %}
8657
8658
8659 //----------MemBar Instructions-----------------------------------------------
8660 // Memory barrier flavors
8661
8662 instruct membar_acquire()
8663 %{
8664 match(MemBarAcquire);
8665 match(LoadFence);
8666 ins_cost(0);
8667
8668 size(0);
8669 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8670 ins_encode();
8671 ins_pipe(empty);
8672 %}
8673
8674 instruct membar_acquire_lock()
8675 %{
8676 match(MemBarAcquireLock);
8677 ins_cost(0);
8678
8679 size(0);
8680 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8681 ins_encode();
8682 ins_pipe(empty);
8683 %}
8684
8685 instruct membar_release()
8686 %{
8687 match(MemBarRelease);
8688 match(StoreFence);
8689 ins_cost(0);
8690
8691 size(0);
8692 format %{ "MEMBAR-release ! (empty encoding)" %}
8693 ins_encode();
8694 ins_pipe(empty);
8695 %}
8696
8697 instruct membar_release_lock()
8698 %{
8699 match(MemBarReleaseLock);
8700 ins_cost(0);
8701
8702 size(0);
8703 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8704 ins_encode();
8705 ins_pipe(empty);
8706 %}
8707
8708 instruct membar_volatile(rFlagsReg cr) %{
8709 match(MemBarVolatile);
8710 effect(KILL cr);
8711 ins_cost(400);
8712
8713 format %{
8714 $$template
8715 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8716 %}
8717 ins_encode %{
8718 __ membar(Assembler::StoreLoad);
8719 %}
8720 ins_pipe(pipe_slow);
8721 %}
8722
8723 instruct unnecessary_membar_volatile()
8724 %{
8725 match(MemBarVolatile);
8726 predicate(Matcher::post_store_load_barrier(n));
8727 ins_cost(0);
8728
8729 size(0);
8730 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8731 ins_encode();
8732 ins_pipe(empty);
8733 %}
8734
8735 instruct membar_storestore() %{
8736 match(MemBarStoreStore);
8737 match(StoreStoreFence);
8738 ins_cost(0);
8739
8740 size(0);
8741 format %{ "MEMBAR-storestore (empty encoding)" %}
8742 ins_encode( );
8743 ins_pipe(empty);
8744 %}
8745
8746 //----------Move Instructions--------------------------------------------------
8747
8748 instruct castX2P(rRegP dst, rRegL src)
8749 %{
8750 match(Set dst (CastX2P src));
8751
8752 format %{ "movq $dst, $src\t# long->ptr" %}
8753 ins_encode %{
8754 if ($dst$$reg != $src$$reg) {
8755 __ movptr($dst$$Register, $src$$Register);
8756 }
8757 %}
8758 ins_pipe(ialu_reg_reg); // XXX
8759 %}
8760
8761 instruct castP2X(rRegL dst, rRegP src)
8762 %{
8763 match(Set dst (CastP2X src));
8764
8765 format %{ "movq $dst, $src\t# ptr -> long" %}
8766 ins_encode %{
8767 if ($dst$$reg != $src$$reg) {
8768 __ movptr($dst$$Register, $src$$Register);
8769 }
8770 %}
8771 ins_pipe(ialu_reg_reg); // XXX
8772 %}
8773
8774 // Convert oop into int for vectors alignment masking
8775 instruct convP2I(rRegI dst, rRegP src)
8776 %{
8777 match(Set dst (ConvL2I (CastP2X src)));
8778
8779 format %{ "movl $dst, $src\t# ptr -> int" %}
8780 ins_encode %{
8781 __ movl($dst$$Register, $src$$Register);
8782 %}
8783 ins_pipe(ialu_reg_reg); // XXX
8784 %}
8785
8786 // Convert compressed oop into int for vectors alignment masking
8787 // in case of 32bit oops (heap < 4Gb).
8788 instruct convN2I(rRegI dst, rRegN src)
8789 %{
8790 predicate(CompressedOops::shift() == 0);
8791 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8792
8793 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8794 ins_encode %{
8795 __ movl($dst$$Register, $src$$Register);
8796 %}
8797 ins_pipe(ialu_reg_reg); // XXX
8798 %}
8799
8800 // Convert oop pointer into compressed form
8801 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8802 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8803 match(Set dst (EncodeP src));
8804 effect(KILL cr);
8805 format %{ "encode_heap_oop $dst,$src" %}
8806 ins_encode %{
8807 Register s = $src$$Register;
8808 Register d = $dst$$Register;
8809 if (s != d) {
8810 __ movq(d, s);
8811 }
8812 __ encode_heap_oop(d);
8813 %}
8814 ins_pipe(ialu_reg_long);
8815 %}
8816
8817 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8818 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8819 match(Set dst (EncodeP src));
8820 effect(KILL cr);
8821 format %{ "encode_heap_oop_not_null $dst,$src" %}
8822 ins_encode %{
8823 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8824 %}
8825 ins_pipe(ialu_reg_long);
8826 %}
8827
8828 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8829 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8830 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8831 match(Set dst (DecodeN src));
8832 effect(KILL cr);
8833 format %{ "decode_heap_oop $dst,$src" %}
8834 ins_encode %{
8835 Register s = $src$$Register;
8836 Register d = $dst$$Register;
8837 if (s != d) {
8838 __ movq(d, s);
8839 }
8840 __ decode_heap_oop(d);
8841 %}
8842 ins_pipe(ialu_reg_long);
8843 %}
8844
8845 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8846 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8847 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8848 match(Set dst (DecodeN src));
8849 effect(KILL cr);
8850 format %{ "decode_heap_oop_not_null $dst,$src" %}
8851 ins_encode %{
8852 Register s = $src$$Register;
8853 Register d = $dst$$Register;
8854 if (s != d) {
8855 __ decode_heap_oop_not_null(d, s);
8856 } else {
8857 __ decode_heap_oop_not_null(d);
8858 }
8859 %}
8860 ins_pipe(ialu_reg_long);
8861 %}
8862
8863 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8864 match(Set dst (EncodePKlass src));
8865 effect(TEMP dst, KILL cr);
8866 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8867 ins_encode %{
8868 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8869 %}
8870 ins_pipe(ialu_reg_long);
8871 %}
8872
8873 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8874 match(Set dst (DecodeNKlass src));
8875 effect(TEMP dst, KILL cr);
8876 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8877 ins_encode %{
8878 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8879 %}
8880 ins_pipe(ialu_reg_long);
8881 %}
8882
8883 //----------Conditional Move---------------------------------------------------
8884 // Jump
8885 // dummy instruction for generating temp registers
8886 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8887 match(Jump (LShiftL switch_val shift));
8888 ins_cost(350);
8889 predicate(false);
8890 effect(TEMP dest);
8891
8892 format %{ "leaq $dest, [$constantaddress]\n\t"
8893 "jmp [$dest + $switch_val << $shift]\n\t" %}
8894 ins_encode %{
8895 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8896 // to do that and the compiler is using that register as one it can allocate.
8897 // So we build it all by hand.
8898 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
8899 // ArrayAddress dispatch(table, index);
8900 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
8901 __ lea($dest$$Register, $constantaddress);
8902 __ jmp(dispatch);
8903 %}
8904 ins_pipe(pipe_jmp);
8905 %}
8906
8907 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8908 match(Jump (AddL (LShiftL switch_val shift) offset));
8909 ins_cost(350);
8910 effect(TEMP dest);
8911
8912 format %{ "leaq $dest, [$constantaddress]\n\t"
8913 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
8914 ins_encode %{
8915 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8916 // to do that and the compiler is using that register as one it can allocate.
8917 // So we build it all by hand.
8918 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8919 // ArrayAddress dispatch(table, index);
8920 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8921 __ lea($dest$$Register, $constantaddress);
8922 __ jmp(dispatch);
8923 %}
8924 ins_pipe(pipe_jmp);
8925 %}
8926
8927 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8928 match(Jump switch_val);
8929 ins_cost(350);
8930 effect(TEMP dest);
8931
8932 format %{ "leaq $dest, [$constantaddress]\n\t"
8933 "jmp [$dest + $switch_val]\n\t" %}
8934 ins_encode %{
8935 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8936 // to do that and the compiler is using that register as one it can allocate.
8937 // So we build it all by hand.
8938 // Address index(noreg, switch_reg, Address::times_1);
8939 // ArrayAddress dispatch(table, index);
8940 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
8941 __ lea($dest$$Register, $constantaddress);
8942 __ jmp(dispatch);
8943 %}
8944 ins_pipe(pipe_jmp);
8945 %}
8946
8947 // Conditional move
8948 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
8949 %{
8950 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
8951 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
8952
8953 ins_cost(100); // XXX
8954 format %{ "setbn$cop $dst\t# signed, int" %}
8955 ins_encode %{
8956 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
8957 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
8958 %}
8959 ins_pipe(ialu_reg);
8960 %}
8961
8962 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
8963 %{
8964 predicate(!UseAPX);
8965 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8966
8967 ins_cost(200); // XXX
8968 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8969 ins_encode %{
8970 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
8971 %}
8972 ins_pipe(pipe_cmov_reg);
8973 %}
8974
8975 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
8976 %{
8977 predicate(UseAPX);
8978 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
8979
8980 ins_cost(200);
8981 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
8982 ins_encode %{
8983 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
8984 %}
8985 ins_pipe(pipe_cmov_reg);
8986 %}
8987
8988 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
8989 %{
8990 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
8991 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
8992
8993 ins_cost(100); // XXX
8994 format %{ "setbn$cop $dst\t# unsigned, int" %}
8995 ins_encode %{
8996 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
8997 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
8998 %}
8999 ins_pipe(ialu_reg);
9000 %}
9001
9002 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9003 predicate(!UseAPX);
9004 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9005
9006 ins_cost(200); // XXX
9007 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9008 ins_encode %{
9009 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9010 %}
9011 ins_pipe(pipe_cmov_reg);
9012 %}
9013
9014 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9015 predicate(UseAPX);
9016 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9017
9018 ins_cost(200);
9019 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9020 ins_encode %{
9021 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9022 %}
9023 ins_pipe(pipe_cmov_reg);
9024 %}
9025
9026 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9027 %{
9028 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9029 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9030
9031 ins_cost(100); // XXX
9032 format %{ "setbn$cop $dst\t# unsigned, int" %}
9033 ins_encode %{
9034 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9035 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9036 %}
9037 ins_pipe(ialu_reg);
9038 %}
9039
9040 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9041 predicate(!UseAPX);
9042 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9043 ins_cost(200);
9044 expand %{
9045 cmovI_regU(cop, cr, dst, src);
9046 %}
9047 %}
9048
9049 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9050 predicate(UseAPX);
9051 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9052 ins_cost(200);
9053 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9054 ins_encode %{
9055 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9056 %}
9057 ins_pipe(pipe_cmov_reg);
9058 %}
9059
9060 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9061 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9062 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9063
9064 ins_cost(200); // XXX
9065 format %{ "cmovpl $dst, $src\n\t"
9066 "cmovnel $dst, $src" %}
9067 ins_encode %{
9068 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9069 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9070 %}
9071 ins_pipe(pipe_cmov_reg);
9072 %}
9073
9074 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9075 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9076 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9077 effect(TEMP dst);
9078
9079 ins_cost(200);
9080 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9081 "cmovnel $dst, $src2" %}
9082 ins_encode %{
9083 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9084 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9085 %}
9086 ins_pipe(pipe_cmov_reg);
9087 %}
9088
9089 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9090 // inputs of the CMove
9091 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9092 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9093 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9094 effect(TEMP dst);
9095
9096 ins_cost(200); // XXX
9097 format %{ "cmovpl $dst, $src\n\t"
9098 "cmovnel $dst, $src" %}
9099 ins_encode %{
9100 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9101 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9102 %}
9103 ins_pipe(pipe_cmov_reg);
9104 %}
9105
9106 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9107 // and parity flag bit is set if any of the operand is a NaN.
9108 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9109 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9110 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9111 effect(TEMP dst);
9112
9113 ins_cost(200);
9114 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9115 "cmovnel $dst, $src2" %}
9116 ins_encode %{
9117 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9118 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9119 %}
9120 ins_pipe(pipe_cmov_reg);
9121 %}
9122
9123 // Conditional move
9124 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9125 predicate(!UseAPX);
9126 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9127
9128 ins_cost(250); // XXX
9129 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9130 ins_encode %{
9131 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9132 %}
9133 ins_pipe(pipe_cmov_mem);
9134 %}
9135
9136 // Conditional move
9137 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9138 %{
9139 predicate(UseAPX);
9140 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9141
9142 ins_cost(250);
9143 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9144 ins_encode %{
9145 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9146 %}
9147 ins_pipe(pipe_cmov_mem);
9148 %}
9149
9150 // Conditional move
9151 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9152 %{
9153 predicate(!UseAPX);
9154 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9155
9156 ins_cost(250); // XXX
9157 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9158 ins_encode %{
9159 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9160 %}
9161 ins_pipe(pipe_cmov_mem);
9162 %}
9163
9164 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9165 predicate(!UseAPX);
9166 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9167 ins_cost(250);
9168 expand %{
9169 cmovI_memU(cop, cr, dst, src);
9170 %}
9171 %}
9172
9173 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9174 %{
9175 predicate(UseAPX);
9176 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9177
9178 ins_cost(250);
9179 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9180 ins_encode %{
9181 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9182 %}
9183 ins_pipe(pipe_cmov_mem);
9184 %}
9185
9186 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9187 %{
9188 predicate(UseAPX);
9189 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9190 ins_cost(250);
9191 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9192 ins_encode %{
9193 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9194 %}
9195 ins_pipe(pipe_cmov_mem);
9196 %}
9197
9198 // Conditional move
9199 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9200 %{
9201 predicate(!UseAPX);
9202 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9203
9204 ins_cost(200); // XXX
9205 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9206 ins_encode %{
9207 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9208 %}
9209 ins_pipe(pipe_cmov_reg);
9210 %}
9211
9212 // Conditional move ndd
9213 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9214 %{
9215 predicate(UseAPX);
9216 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9217
9218 ins_cost(200);
9219 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9220 ins_encode %{
9221 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9222 %}
9223 ins_pipe(pipe_cmov_reg);
9224 %}
9225
9226 // Conditional move
9227 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9228 %{
9229 predicate(!UseAPX);
9230 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9231
9232 ins_cost(200); // XXX
9233 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9234 ins_encode %{
9235 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9236 %}
9237 ins_pipe(pipe_cmov_reg);
9238 %}
9239
9240 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9241 predicate(!UseAPX);
9242 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9243 ins_cost(200);
9244 expand %{
9245 cmovN_regU(cop, cr, dst, src);
9246 %}
9247 %}
9248
9249 // Conditional move ndd
9250 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9251 %{
9252 predicate(UseAPX);
9253 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9254
9255 ins_cost(200);
9256 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9257 ins_encode %{
9258 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9259 %}
9260 ins_pipe(pipe_cmov_reg);
9261 %}
9262
9263 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9264 predicate(UseAPX);
9265 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9266 ins_cost(200);
9267 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9268 ins_encode %{
9269 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9270 %}
9271 ins_pipe(pipe_cmov_reg);
9272 %}
9273
9274 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9275 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9276 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9277
9278 ins_cost(200); // XXX
9279 format %{ "cmovpl $dst, $src\n\t"
9280 "cmovnel $dst, $src" %}
9281 ins_encode %{
9282 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9283 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9284 %}
9285 ins_pipe(pipe_cmov_reg);
9286 %}
9287
9288 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9289 // inputs of the CMove
9290 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9291 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9292 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9293
9294 ins_cost(200); // XXX
9295 format %{ "cmovpl $dst, $src\n\t"
9296 "cmovnel $dst, $src" %}
9297 ins_encode %{
9298 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9299 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9300 %}
9301 ins_pipe(pipe_cmov_reg);
9302 %}
9303
9304 // Conditional move
9305 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9306 %{
9307 predicate(!UseAPX);
9308 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9309
9310 ins_cost(200); // XXX
9311 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9312 ins_encode %{
9313 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9314 %}
9315 ins_pipe(pipe_cmov_reg); // XXX
9316 %}
9317
9318 // Conditional move ndd
9319 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9320 %{
9321 predicate(UseAPX);
9322 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9323
9324 ins_cost(200);
9325 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9326 ins_encode %{
9327 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9328 %}
9329 ins_pipe(pipe_cmov_reg);
9330 %}
9331
9332 // Conditional move
9333 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9334 %{
9335 predicate(!UseAPX);
9336 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9337
9338 ins_cost(200); // XXX
9339 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9340 ins_encode %{
9341 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9342 %}
9343 ins_pipe(pipe_cmov_reg); // XXX
9344 %}
9345
9346 // Conditional move ndd
9347 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9348 %{
9349 predicate(UseAPX);
9350 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9351
9352 ins_cost(200);
9353 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9354 ins_encode %{
9355 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9356 %}
9357 ins_pipe(pipe_cmov_reg);
9358 %}
9359
9360 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9361 predicate(!UseAPX);
9362 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9363 ins_cost(200);
9364 expand %{
9365 cmovP_regU(cop, cr, dst, src);
9366 %}
9367 %}
9368
9369 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9370 predicate(UseAPX);
9371 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9372 ins_cost(200);
9373 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9374 ins_encode %{
9375 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9376 %}
9377 ins_pipe(pipe_cmov_reg);
9378 %}
9379
9380 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9381 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9382 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9383
9384 ins_cost(200); // XXX
9385 format %{ "cmovpq $dst, $src\n\t"
9386 "cmovneq $dst, $src" %}
9387 ins_encode %{
9388 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9389 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9390 %}
9391 ins_pipe(pipe_cmov_reg);
9392 %}
9393
9394 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9395 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9396 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9397 effect(TEMP dst);
9398
9399 ins_cost(200);
9400 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9401 "cmovneq $dst, $src2" %}
9402 ins_encode %{
9403 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9404 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9405 %}
9406 ins_pipe(pipe_cmov_reg);
9407 %}
9408
9409 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9410 // inputs of the CMove
9411 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9412 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9413 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9414
9415 ins_cost(200); // XXX
9416 format %{ "cmovpq $dst, $src\n\t"
9417 "cmovneq $dst, $src" %}
9418 ins_encode %{
9419 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9420 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9421 %}
9422 ins_pipe(pipe_cmov_reg);
9423 %}
9424
9425 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9426 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9427 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9428 effect(TEMP dst);
9429
9430 ins_cost(200);
9431 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9432 "cmovneq $dst, $src2" %}
9433 ins_encode %{
9434 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9435 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9436 %}
9437 ins_pipe(pipe_cmov_reg);
9438 %}
9439
9440 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9441 %{
9442 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9443 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9444
9445 ins_cost(100); // XXX
9446 format %{ "setbn$cop $dst\t# signed, long" %}
9447 ins_encode %{
9448 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9449 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9450 %}
9451 ins_pipe(ialu_reg);
9452 %}
9453
9454 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9455 %{
9456 predicate(!UseAPX);
9457 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9458
9459 ins_cost(200); // XXX
9460 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9461 ins_encode %{
9462 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9463 %}
9464 ins_pipe(pipe_cmov_reg); // XXX
9465 %}
9466
9467 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9468 %{
9469 predicate(UseAPX);
9470 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9471
9472 ins_cost(200);
9473 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9474 ins_encode %{
9475 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9476 %}
9477 ins_pipe(pipe_cmov_reg);
9478 %}
9479
9480 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9481 %{
9482 predicate(!UseAPX);
9483 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9484
9485 ins_cost(200); // XXX
9486 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9487 ins_encode %{
9488 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9489 %}
9490 ins_pipe(pipe_cmov_mem); // XXX
9491 %}
9492
9493 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9494 %{
9495 predicate(UseAPX);
9496 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9497
9498 ins_cost(200);
9499 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9500 ins_encode %{
9501 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9502 %}
9503 ins_pipe(pipe_cmov_mem);
9504 %}
9505
9506 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9507 %{
9508 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9509 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9510
9511 ins_cost(100); // XXX
9512 format %{ "setbn$cop $dst\t# unsigned, long" %}
9513 ins_encode %{
9514 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9515 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9516 %}
9517 ins_pipe(ialu_reg);
9518 %}
9519
9520 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9521 %{
9522 predicate(!UseAPX);
9523 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9524
9525 ins_cost(200); // XXX
9526 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9527 ins_encode %{
9528 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9529 %}
9530 ins_pipe(pipe_cmov_reg); // XXX
9531 %}
9532
9533 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9534 %{
9535 predicate(UseAPX);
9536 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9537
9538 ins_cost(200);
9539 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9540 ins_encode %{
9541 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9542 %}
9543 ins_pipe(pipe_cmov_reg);
9544 %}
9545
9546 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9547 %{
9548 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9549 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9550
9551 ins_cost(100); // XXX
9552 format %{ "setbn$cop $dst\t# unsigned, long" %}
9553 ins_encode %{
9554 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9555 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9556 %}
9557 ins_pipe(ialu_reg);
9558 %}
9559
9560 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9561 predicate(!UseAPX);
9562 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9563 ins_cost(200);
9564 expand %{
9565 cmovL_regU(cop, cr, dst, src);
9566 %}
9567 %}
9568
9569 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9570 %{
9571 predicate(UseAPX);
9572 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9573 ins_cost(200);
9574 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9575 ins_encode %{
9576 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9577 %}
9578 ins_pipe(pipe_cmov_reg);
9579 %}
9580
9581 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9582 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9583 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9584
9585 ins_cost(200); // XXX
9586 format %{ "cmovpq $dst, $src\n\t"
9587 "cmovneq $dst, $src" %}
9588 ins_encode %{
9589 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9590 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9591 %}
9592 ins_pipe(pipe_cmov_reg);
9593 %}
9594
9595 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9596 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9597 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9598 effect(TEMP dst);
9599
9600 ins_cost(200);
9601 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9602 "cmovneq $dst, $src2" %}
9603 ins_encode %{
9604 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9605 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9606 %}
9607 ins_pipe(pipe_cmov_reg);
9608 %}
9609
9610 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9611 // inputs of the CMove
9612 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9613 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9614 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9615
9616 ins_cost(200); // XXX
9617 format %{ "cmovpq $dst, $src\n\t"
9618 "cmovneq $dst, $src" %}
9619 ins_encode %{
9620 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9621 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9622 %}
9623 ins_pipe(pipe_cmov_reg);
9624 %}
9625
9626 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9627 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9628 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9629 effect(TEMP dst);
9630
9631 ins_cost(200);
9632 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9633 "cmovneq $dst, $src2" %}
9634 ins_encode %{
9635 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9636 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9637 %}
9638 ins_pipe(pipe_cmov_reg);
9639 %}
9640
9641 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9642 %{
9643 predicate(!UseAPX);
9644 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9645
9646 ins_cost(200); // XXX
9647 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9648 ins_encode %{
9649 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9650 %}
9651 ins_pipe(pipe_cmov_mem); // XXX
9652 %}
9653
9654 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9655 predicate(!UseAPX);
9656 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9657 ins_cost(200);
9658 expand %{
9659 cmovL_memU(cop, cr, dst, src);
9660 %}
9661 %}
9662
9663 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9664 %{
9665 predicate(UseAPX);
9666 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9667
9668 ins_cost(200);
9669 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9670 ins_encode %{
9671 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9672 %}
9673 ins_pipe(pipe_cmov_mem);
9674 %}
9675
9676 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9677 %{
9678 predicate(UseAPX);
9679 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9680 ins_cost(200);
9681 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9682 ins_encode %{
9683 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9684 %}
9685 ins_pipe(pipe_cmov_mem);
9686 %}
9687
9688 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9689 %{
9690 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9691
9692 ins_cost(200); // XXX
9693 format %{ "jn$cop skip\t# signed cmove float\n\t"
9694 "movss $dst, $src\n"
9695 "skip:" %}
9696 ins_encode %{
9697 Label Lskip;
9698 // Invert sense of branch from sense of CMOV
9699 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9700 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9701 __ bind(Lskip);
9702 %}
9703 ins_pipe(pipe_slow);
9704 %}
9705
9706 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9707 %{
9708 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9709
9710 ins_cost(200); // XXX
9711 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9712 "movss $dst, $src\n"
9713 "skip:" %}
9714 ins_encode %{
9715 Label Lskip;
9716 // Invert sense of branch from sense of CMOV
9717 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9718 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9719 __ bind(Lskip);
9720 %}
9721 ins_pipe(pipe_slow);
9722 %}
9723
9724 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9725 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9726 ins_cost(200);
9727 expand %{
9728 cmovF_regU(cop, cr, dst, src);
9729 %}
9730 %}
9731
9732 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9733 %{
9734 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9735
9736 ins_cost(200); // XXX
9737 format %{ "jn$cop skip\t# signed cmove double\n\t"
9738 "movsd $dst, $src\n"
9739 "skip:" %}
9740 ins_encode %{
9741 Label Lskip;
9742 // Invert sense of branch from sense of CMOV
9743 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9744 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9745 __ bind(Lskip);
9746 %}
9747 ins_pipe(pipe_slow);
9748 %}
9749
9750 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9751 %{
9752 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9753
9754 ins_cost(200); // XXX
9755 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9756 "movsd $dst, $src\n"
9757 "skip:" %}
9758 ins_encode %{
9759 Label Lskip;
9760 // Invert sense of branch from sense of CMOV
9761 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9762 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9763 __ bind(Lskip);
9764 %}
9765 ins_pipe(pipe_slow);
9766 %}
9767
9768 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9769 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9770 ins_cost(200);
9771 expand %{
9772 cmovD_regU(cop, cr, dst, src);
9773 %}
9774 %}
9775
9776 //----------Arithmetic Instructions--------------------------------------------
9777 //----------Addition Instructions----------------------------------------------
9778
9779 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9780 %{
9781 predicate(!UseAPX);
9782 match(Set dst (AddI dst src));
9783 effect(KILL cr);
9784 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9785 format %{ "addl $dst, $src\t# int" %}
9786 ins_encode %{
9787 __ addl($dst$$Register, $src$$Register);
9788 %}
9789 ins_pipe(ialu_reg_reg);
9790 %}
9791
9792 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9793 %{
9794 predicate(UseAPX);
9795 match(Set dst (AddI src1 src2));
9796 effect(KILL cr);
9797 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9798
9799 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9800 ins_encode %{
9801 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9802 %}
9803 ins_pipe(ialu_reg_reg);
9804 %}
9805
9806 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9807 %{
9808 predicate(!UseAPX);
9809 match(Set dst (AddI dst src));
9810 effect(KILL cr);
9811 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9812
9813 format %{ "addl $dst, $src\t# int" %}
9814 ins_encode %{
9815 __ addl($dst$$Register, $src$$constant);
9816 %}
9817 ins_pipe( ialu_reg );
9818 %}
9819
9820 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9821 %{
9822 predicate(UseAPX);
9823 match(Set dst (AddI src1 src2));
9824 effect(KILL cr);
9825 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9826
9827 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9828 ins_encode %{
9829 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9830 %}
9831 ins_pipe( ialu_reg );
9832 %}
9833
9834 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9835 %{
9836 predicate(UseAPX);
9837 match(Set dst (AddI (LoadI src1) src2));
9838 effect(KILL cr);
9839 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9840
9841 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9842 ins_encode %{
9843 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9844 %}
9845 ins_pipe( ialu_reg );
9846 %}
9847
9848 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9849 %{
9850 predicate(!UseAPX);
9851 match(Set dst (AddI dst (LoadI src)));
9852 effect(KILL cr);
9853 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9854
9855 ins_cost(150); // XXX
9856 format %{ "addl $dst, $src\t# int" %}
9857 ins_encode %{
9858 __ addl($dst$$Register, $src$$Address);
9859 %}
9860 ins_pipe(ialu_reg_mem);
9861 %}
9862
9863 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9864 %{
9865 predicate(UseAPX);
9866 match(Set dst (AddI src1 (LoadI src2)));
9867 effect(KILL cr);
9868 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9869
9870 ins_cost(150);
9871 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9872 ins_encode %{
9873 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9874 %}
9875 ins_pipe(ialu_reg_mem);
9876 %}
9877
9878 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9879 %{
9880 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9881 effect(KILL cr);
9882 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9883
9884 ins_cost(150); // XXX
9885 format %{ "addl $dst, $src\t# int" %}
9886 ins_encode %{
9887 __ addl($dst$$Address, $src$$Register);
9888 %}
9889 ins_pipe(ialu_mem_reg);
9890 %}
9891
9892 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9893 %{
9894 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9895 effect(KILL cr);
9896 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9897
9898
9899 ins_cost(125); // XXX
9900 format %{ "addl $dst, $src\t# int" %}
9901 ins_encode %{
9902 __ addl($dst$$Address, $src$$constant);
9903 %}
9904 ins_pipe(ialu_mem_imm);
9905 %}
9906
9907 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9908 %{
9909 predicate(!UseAPX && UseIncDec);
9910 match(Set dst (AddI dst src));
9911 effect(KILL cr);
9912
9913 format %{ "incl $dst\t# int" %}
9914 ins_encode %{
9915 __ incrementl($dst$$Register);
9916 %}
9917 ins_pipe(ialu_reg);
9918 %}
9919
9920 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9921 %{
9922 predicate(UseAPX && UseIncDec);
9923 match(Set dst (AddI src val));
9924 effect(KILL cr);
9925
9926 format %{ "eincl $dst, $src\t# int ndd" %}
9927 ins_encode %{
9928 __ eincl($dst$$Register, $src$$Register, false);
9929 %}
9930 ins_pipe(ialu_reg);
9931 %}
9932
9933 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
9934 %{
9935 predicate(UseAPX && UseIncDec);
9936 match(Set dst (AddI (LoadI src) val));
9937 effect(KILL cr);
9938
9939 format %{ "eincl $dst, $src\t# int ndd" %}
9940 ins_encode %{
9941 __ eincl($dst$$Register, $src$$Address, false);
9942 %}
9943 ins_pipe(ialu_reg);
9944 %}
9945
9946 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
9947 %{
9948 predicate(UseIncDec);
9949 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9950 effect(KILL cr);
9951
9952 ins_cost(125); // XXX
9953 format %{ "incl $dst\t# int" %}
9954 ins_encode %{
9955 __ incrementl($dst$$Address);
9956 %}
9957 ins_pipe(ialu_mem_imm);
9958 %}
9959
9960 // XXX why does that use AddI
9961 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
9962 %{
9963 predicate(!UseAPX && UseIncDec);
9964 match(Set dst (AddI dst src));
9965 effect(KILL cr);
9966
9967 format %{ "decl $dst\t# int" %}
9968 ins_encode %{
9969 __ decrementl($dst$$Register);
9970 %}
9971 ins_pipe(ialu_reg);
9972 %}
9973
9974 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
9975 %{
9976 predicate(UseAPX && UseIncDec);
9977 match(Set dst (AddI src val));
9978 effect(KILL cr);
9979
9980 format %{ "edecl $dst, $src\t# int ndd" %}
9981 ins_encode %{
9982 __ edecl($dst$$Register, $src$$Register, false);
9983 %}
9984 ins_pipe(ialu_reg);
9985 %}
9986
9987 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
9988 %{
9989 predicate(UseAPX && UseIncDec);
9990 match(Set dst (AddI (LoadI src) val));
9991 effect(KILL cr);
9992
9993 format %{ "edecl $dst, $src\t# int ndd" %}
9994 ins_encode %{
9995 __ edecl($dst$$Register, $src$$Address, false);
9996 %}
9997 ins_pipe(ialu_reg);
9998 %}
9999
10000 // XXX why does that use AddI
10001 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10002 %{
10003 predicate(UseIncDec);
10004 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10005 effect(KILL cr);
10006
10007 ins_cost(125); // XXX
10008 format %{ "decl $dst\t# int" %}
10009 ins_encode %{
10010 __ decrementl($dst$$Address);
10011 %}
10012 ins_pipe(ialu_mem_imm);
10013 %}
10014
10015 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10016 %{
10017 predicate(VM_Version::supports_fast_2op_lea());
10018 match(Set dst (AddI (LShiftI index scale) disp));
10019
10020 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10021 ins_encode %{
10022 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10023 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10024 %}
10025 ins_pipe(ialu_reg_reg);
10026 %}
10027
10028 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10029 %{
10030 predicate(VM_Version::supports_fast_3op_lea());
10031 match(Set dst (AddI (AddI base index) disp));
10032
10033 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10034 ins_encode %{
10035 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10036 %}
10037 ins_pipe(ialu_reg_reg);
10038 %}
10039
10040 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10041 %{
10042 predicate(VM_Version::supports_fast_2op_lea());
10043 match(Set dst (AddI base (LShiftI index scale)));
10044
10045 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10046 ins_encode %{
10047 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10048 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10049 %}
10050 ins_pipe(ialu_reg_reg);
10051 %}
10052
10053 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10054 %{
10055 predicate(VM_Version::supports_fast_3op_lea());
10056 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10057
10058 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10059 ins_encode %{
10060 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10061 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10062 %}
10063 ins_pipe(ialu_reg_reg);
10064 %}
10065
10066 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10067 %{
10068 predicate(!UseAPX);
10069 match(Set dst (AddL dst src));
10070 effect(KILL cr);
10071 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10072
10073 format %{ "addq $dst, $src\t# long" %}
10074 ins_encode %{
10075 __ addq($dst$$Register, $src$$Register);
10076 %}
10077 ins_pipe(ialu_reg_reg);
10078 %}
10079
10080 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10081 %{
10082 predicate(UseAPX);
10083 match(Set dst (AddL src1 src2));
10084 effect(KILL cr);
10085 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10086
10087 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10088 ins_encode %{
10089 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10090 %}
10091 ins_pipe(ialu_reg_reg);
10092 %}
10093
10094 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10095 %{
10096 predicate(!UseAPX);
10097 match(Set dst (AddL dst src));
10098 effect(KILL cr);
10099 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10100
10101 format %{ "addq $dst, $src\t# long" %}
10102 ins_encode %{
10103 __ addq($dst$$Register, $src$$constant);
10104 %}
10105 ins_pipe( ialu_reg );
10106 %}
10107
10108 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10109 %{
10110 predicate(UseAPX);
10111 match(Set dst (AddL src1 src2));
10112 effect(KILL cr);
10113 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10114
10115 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10116 ins_encode %{
10117 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10118 %}
10119 ins_pipe( ialu_reg );
10120 %}
10121
10122 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10123 %{
10124 predicate(UseAPX);
10125 match(Set dst (AddL (LoadL src1) src2));
10126 effect(KILL cr);
10127 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10128
10129 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10130 ins_encode %{
10131 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10132 %}
10133 ins_pipe( ialu_reg );
10134 %}
10135
10136 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10137 %{
10138 predicate(!UseAPX);
10139 match(Set dst (AddL dst (LoadL src)));
10140 effect(KILL cr);
10141 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10142
10143 ins_cost(150); // XXX
10144 format %{ "addq $dst, $src\t# long" %}
10145 ins_encode %{
10146 __ addq($dst$$Register, $src$$Address);
10147 %}
10148 ins_pipe(ialu_reg_mem);
10149 %}
10150
10151 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10152 %{
10153 predicate(UseAPX);
10154 match(Set dst (AddL src1 (LoadL src2)));
10155 effect(KILL cr);
10156 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10157
10158 ins_cost(150);
10159 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10160 ins_encode %{
10161 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10162 %}
10163 ins_pipe(ialu_reg_mem);
10164 %}
10165
10166 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10167 %{
10168 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10169 effect(KILL cr);
10170 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10171
10172 ins_cost(150); // XXX
10173 format %{ "addq $dst, $src\t# long" %}
10174 ins_encode %{
10175 __ addq($dst$$Address, $src$$Register);
10176 %}
10177 ins_pipe(ialu_mem_reg);
10178 %}
10179
10180 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10181 %{
10182 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10183 effect(KILL cr);
10184 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10185
10186 ins_cost(125); // XXX
10187 format %{ "addq $dst, $src\t# long" %}
10188 ins_encode %{
10189 __ addq($dst$$Address, $src$$constant);
10190 %}
10191 ins_pipe(ialu_mem_imm);
10192 %}
10193
10194 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10195 %{
10196 predicate(!UseAPX && UseIncDec);
10197 match(Set dst (AddL dst src));
10198 effect(KILL cr);
10199
10200 format %{ "incq $dst\t# long" %}
10201 ins_encode %{
10202 __ incrementq($dst$$Register);
10203 %}
10204 ins_pipe(ialu_reg);
10205 %}
10206
10207 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10208 %{
10209 predicate(UseAPX && UseIncDec);
10210 match(Set dst (AddL src val));
10211 effect(KILL cr);
10212
10213 format %{ "eincq $dst, $src\t# long ndd" %}
10214 ins_encode %{
10215 __ eincq($dst$$Register, $src$$Register, false);
10216 %}
10217 ins_pipe(ialu_reg);
10218 %}
10219
10220 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10221 %{
10222 predicate(UseAPX && UseIncDec);
10223 match(Set dst (AddL (LoadL src) val));
10224 effect(KILL cr);
10225
10226 format %{ "eincq $dst, $src\t# long ndd" %}
10227 ins_encode %{
10228 __ eincq($dst$$Register, $src$$Address, false);
10229 %}
10230 ins_pipe(ialu_reg);
10231 %}
10232
10233 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10234 %{
10235 predicate(UseIncDec);
10236 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10237 effect(KILL cr);
10238
10239 ins_cost(125); // XXX
10240 format %{ "incq $dst\t# long" %}
10241 ins_encode %{
10242 __ incrementq($dst$$Address);
10243 %}
10244 ins_pipe(ialu_mem_imm);
10245 %}
10246
10247 // XXX why does that use AddL
10248 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10249 %{
10250 predicate(!UseAPX && UseIncDec);
10251 match(Set dst (AddL dst src));
10252 effect(KILL cr);
10253
10254 format %{ "decq $dst\t# long" %}
10255 ins_encode %{
10256 __ decrementq($dst$$Register);
10257 %}
10258 ins_pipe(ialu_reg);
10259 %}
10260
10261 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10262 %{
10263 predicate(UseAPX && UseIncDec);
10264 match(Set dst (AddL src val));
10265 effect(KILL cr);
10266
10267 format %{ "edecq $dst, $src\t# long ndd" %}
10268 ins_encode %{
10269 __ edecq($dst$$Register, $src$$Register, false);
10270 %}
10271 ins_pipe(ialu_reg);
10272 %}
10273
10274 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10275 %{
10276 predicate(UseAPX && UseIncDec);
10277 match(Set dst (AddL (LoadL src) val));
10278 effect(KILL cr);
10279
10280 format %{ "edecq $dst, $src\t# long ndd" %}
10281 ins_encode %{
10282 __ edecq($dst$$Register, $src$$Address, false);
10283 %}
10284 ins_pipe(ialu_reg);
10285 %}
10286
10287 // XXX why does that use AddL
10288 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10289 %{
10290 predicate(UseIncDec);
10291 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10292 effect(KILL cr);
10293
10294 ins_cost(125); // XXX
10295 format %{ "decq $dst\t# long" %}
10296 ins_encode %{
10297 __ decrementq($dst$$Address);
10298 %}
10299 ins_pipe(ialu_mem_imm);
10300 %}
10301
10302 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10303 %{
10304 predicate(VM_Version::supports_fast_2op_lea());
10305 match(Set dst (AddL (LShiftL index scale) disp));
10306
10307 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10308 ins_encode %{
10309 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10310 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10311 %}
10312 ins_pipe(ialu_reg_reg);
10313 %}
10314
10315 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10316 %{
10317 predicate(VM_Version::supports_fast_3op_lea());
10318 match(Set dst (AddL (AddL base index) disp));
10319
10320 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10321 ins_encode %{
10322 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10323 %}
10324 ins_pipe(ialu_reg_reg);
10325 %}
10326
10327 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10328 %{
10329 predicate(VM_Version::supports_fast_2op_lea());
10330 match(Set dst (AddL base (LShiftL index scale)));
10331
10332 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10333 ins_encode %{
10334 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10335 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10336 %}
10337 ins_pipe(ialu_reg_reg);
10338 %}
10339
10340 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10341 %{
10342 predicate(VM_Version::supports_fast_3op_lea());
10343 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10344
10345 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10346 ins_encode %{
10347 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10348 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10349 %}
10350 ins_pipe(ialu_reg_reg);
10351 %}
10352
10353 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10354 %{
10355 match(Set dst (AddP dst src));
10356 effect(KILL cr);
10357 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10358
10359 format %{ "addq $dst, $src\t# ptr" %}
10360 ins_encode %{
10361 __ addq($dst$$Register, $src$$Register);
10362 %}
10363 ins_pipe(ialu_reg_reg);
10364 %}
10365
10366 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10367 %{
10368 match(Set dst (AddP dst src));
10369 effect(KILL cr);
10370 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10371
10372 format %{ "addq $dst, $src\t# ptr" %}
10373 ins_encode %{
10374 __ addq($dst$$Register, $src$$constant);
10375 %}
10376 ins_pipe( ialu_reg );
10377 %}
10378
10379 // XXX addP mem ops ????
10380
10381 instruct checkCastPP(rRegP dst)
10382 %{
10383 match(Set dst (CheckCastPP dst));
10384
10385 size(0);
10386 format %{ "# checkcastPP of $dst" %}
10387 ins_encode(/* empty encoding */);
10388 ins_pipe(empty);
10389 %}
10390
10391 instruct castPP(rRegP dst)
10392 %{
10393 match(Set dst (CastPP dst));
10394
10395 size(0);
10396 format %{ "# castPP of $dst" %}
10397 ins_encode(/* empty encoding */);
10398 ins_pipe(empty);
10399 %}
10400
10401 instruct castII(rRegI dst)
10402 %{
10403 predicate(VerifyConstraintCasts == 0);
10404 match(Set dst (CastII dst));
10405
10406 size(0);
10407 format %{ "# castII of $dst" %}
10408 ins_encode(/* empty encoding */);
10409 ins_cost(0);
10410 ins_pipe(empty);
10411 %}
10412
10413 instruct castII_checked(rRegI dst, rFlagsReg cr)
10414 %{
10415 predicate(VerifyConstraintCasts > 0);
10416 match(Set dst (CastII dst));
10417
10418 effect(KILL cr);
10419 format %{ "# cast_checked_II $dst" %}
10420 ins_encode %{
10421 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10422 %}
10423 ins_pipe(pipe_slow);
10424 %}
10425
10426 instruct castLL(rRegL dst)
10427 %{
10428 predicate(VerifyConstraintCasts == 0);
10429 match(Set dst (CastLL dst));
10430
10431 size(0);
10432 format %{ "# castLL of $dst" %}
10433 ins_encode(/* empty encoding */);
10434 ins_cost(0);
10435 ins_pipe(empty);
10436 %}
10437
10438 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10439 %{
10440 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10441 match(Set dst (CastLL dst));
10442
10443 effect(KILL cr);
10444 format %{ "# cast_checked_LL $dst" %}
10445 ins_encode %{
10446 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10447 %}
10448 ins_pipe(pipe_slow);
10449 %}
10450
10451 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10452 %{
10453 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10454 match(Set dst (CastLL dst));
10455
10456 effect(KILL cr, TEMP tmp);
10457 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10458 ins_encode %{
10459 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10460 %}
10461 ins_pipe(pipe_slow);
10462 %}
10463
10464 instruct castFF(regF dst)
10465 %{
10466 match(Set dst (CastFF dst));
10467
10468 size(0);
10469 format %{ "# castFF of $dst" %}
10470 ins_encode(/* empty encoding */);
10471 ins_cost(0);
10472 ins_pipe(empty);
10473 %}
10474
10475 instruct castHH(regF dst)
10476 %{
10477 match(Set dst (CastHH dst));
10478
10479 size(0);
10480 format %{ "# castHH of $dst" %}
10481 ins_encode(/* empty encoding */);
10482 ins_cost(0);
10483 ins_pipe(empty);
10484 %}
10485
10486 instruct castDD(regD dst)
10487 %{
10488 match(Set dst (CastDD dst));
10489
10490 size(0);
10491 format %{ "# castDD of $dst" %}
10492 ins_encode(/* empty encoding */);
10493 ins_cost(0);
10494 ins_pipe(empty);
10495 %}
10496
10497 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10498 instruct compareAndSwapP(rRegI res,
10499 memory mem_ptr,
10500 rax_RegP oldval, rRegP newval,
10501 rFlagsReg cr)
10502 %{
10503 predicate(n->as_LoadStore()->barrier_data() == 0);
10504 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10505 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10506 effect(KILL cr, KILL oldval);
10507
10508 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10509 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10510 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10511 ins_encode %{
10512 __ lock();
10513 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10514 __ setcc(Assembler::equal, $res$$Register);
10515 %}
10516 ins_pipe( pipe_cmpxchg );
10517 %}
10518
10519 instruct compareAndSwapL(rRegI res,
10520 memory mem_ptr,
10521 rax_RegL oldval, rRegL newval,
10522 rFlagsReg cr)
10523 %{
10524 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10525 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10526 effect(KILL cr, KILL oldval);
10527
10528 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10529 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10530 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10531 ins_encode %{
10532 __ lock();
10533 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10534 __ setcc(Assembler::equal, $res$$Register);
10535 %}
10536 ins_pipe( pipe_cmpxchg );
10537 %}
10538
10539 instruct compareAndSwapI(rRegI res,
10540 memory mem_ptr,
10541 rax_RegI oldval, rRegI newval,
10542 rFlagsReg cr)
10543 %{
10544 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10545 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10546 effect(KILL cr, KILL oldval);
10547
10548 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10549 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10550 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10551 ins_encode %{
10552 __ lock();
10553 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10554 __ setcc(Assembler::equal, $res$$Register);
10555 %}
10556 ins_pipe( pipe_cmpxchg );
10557 %}
10558
10559 instruct compareAndSwapB(rRegI res,
10560 memory mem_ptr,
10561 rax_RegI oldval, rRegI newval,
10562 rFlagsReg cr)
10563 %{
10564 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10565 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10566 effect(KILL cr, KILL oldval);
10567
10568 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10569 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10570 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10571 ins_encode %{
10572 __ lock();
10573 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10574 __ setcc(Assembler::equal, $res$$Register);
10575 %}
10576 ins_pipe( pipe_cmpxchg );
10577 %}
10578
10579 instruct compareAndSwapS(rRegI res,
10580 memory mem_ptr,
10581 rax_RegI oldval, rRegI newval,
10582 rFlagsReg cr)
10583 %{
10584 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10585 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10586 effect(KILL cr, KILL oldval);
10587
10588 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10589 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10590 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10591 ins_encode %{
10592 __ lock();
10593 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10594 __ setcc(Assembler::equal, $res$$Register);
10595 %}
10596 ins_pipe( pipe_cmpxchg );
10597 %}
10598
10599 instruct compareAndSwapN(rRegI res,
10600 memory mem_ptr,
10601 rax_RegN oldval, rRegN newval,
10602 rFlagsReg cr) %{
10603 predicate(n->as_LoadStore()->barrier_data() == 0);
10604 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10605 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10606 effect(KILL cr, KILL oldval);
10607
10608 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10609 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10610 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10611 ins_encode %{
10612 __ lock();
10613 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10614 __ setcc(Assembler::equal, $res$$Register);
10615 %}
10616 ins_pipe( pipe_cmpxchg );
10617 %}
10618
10619 instruct compareAndExchangeB(
10620 memory mem_ptr,
10621 rax_RegI oldval, rRegI newval,
10622 rFlagsReg cr)
10623 %{
10624 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10625 effect(KILL cr);
10626
10627 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10628 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10629 ins_encode %{
10630 __ lock();
10631 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10632 %}
10633 ins_pipe( pipe_cmpxchg );
10634 %}
10635
10636 instruct compareAndExchangeS(
10637 memory mem_ptr,
10638 rax_RegI oldval, rRegI newval,
10639 rFlagsReg cr)
10640 %{
10641 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10642 effect(KILL cr);
10643
10644 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10645 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10646 ins_encode %{
10647 __ lock();
10648 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10649 %}
10650 ins_pipe( pipe_cmpxchg );
10651 %}
10652
10653 instruct compareAndExchangeI(
10654 memory mem_ptr,
10655 rax_RegI oldval, rRegI newval,
10656 rFlagsReg cr)
10657 %{
10658 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10659 effect(KILL cr);
10660
10661 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10662 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10663 ins_encode %{
10664 __ lock();
10665 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10666 %}
10667 ins_pipe( pipe_cmpxchg );
10668 %}
10669
10670 instruct compareAndExchangeL(
10671 memory mem_ptr,
10672 rax_RegL oldval, rRegL newval,
10673 rFlagsReg cr)
10674 %{
10675 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10676 effect(KILL cr);
10677
10678 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10679 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10680 ins_encode %{
10681 __ lock();
10682 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10683 %}
10684 ins_pipe( pipe_cmpxchg );
10685 %}
10686
10687 instruct compareAndExchangeN(
10688 memory mem_ptr,
10689 rax_RegN oldval, rRegN newval,
10690 rFlagsReg cr) %{
10691 predicate(n->as_LoadStore()->barrier_data() == 0);
10692 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10693 effect(KILL cr);
10694
10695 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10696 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10697 ins_encode %{
10698 __ lock();
10699 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10700 %}
10701 ins_pipe( pipe_cmpxchg );
10702 %}
10703
10704 instruct compareAndExchangeP(
10705 memory mem_ptr,
10706 rax_RegP oldval, rRegP newval,
10707 rFlagsReg cr)
10708 %{
10709 predicate(n->as_LoadStore()->barrier_data() == 0);
10710 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10711 effect(KILL cr);
10712
10713 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10714 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10715 ins_encode %{
10716 __ lock();
10717 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10718 %}
10719 ins_pipe( pipe_cmpxchg );
10720 %}
10721
10722 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10723 predicate(n->as_LoadStore()->result_not_used());
10724 match(Set dummy (GetAndAddB mem add));
10725 effect(KILL cr);
10726 format %{ "addb_lock $mem, $add" %}
10727 ins_encode %{
10728 __ lock();
10729 __ addb($mem$$Address, $add$$Register);
10730 %}
10731 ins_pipe(pipe_cmpxchg);
10732 %}
10733
10734 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10735 predicate(n->as_LoadStore()->result_not_used());
10736 match(Set dummy (GetAndAddB mem add));
10737 effect(KILL cr);
10738 format %{ "addb_lock $mem, $add" %}
10739 ins_encode %{
10740 __ lock();
10741 __ addb($mem$$Address, $add$$constant);
10742 %}
10743 ins_pipe(pipe_cmpxchg);
10744 %}
10745
10746 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10747 predicate(!n->as_LoadStore()->result_not_used());
10748 match(Set newval (GetAndAddB mem newval));
10749 effect(KILL cr);
10750 format %{ "xaddb_lock $mem, $newval" %}
10751 ins_encode %{
10752 __ lock();
10753 __ xaddb($mem$$Address, $newval$$Register);
10754 %}
10755 ins_pipe(pipe_cmpxchg);
10756 %}
10757
10758 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10759 predicate(n->as_LoadStore()->result_not_used());
10760 match(Set dummy (GetAndAddS mem add));
10761 effect(KILL cr);
10762 format %{ "addw_lock $mem, $add" %}
10763 ins_encode %{
10764 __ lock();
10765 __ addw($mem$$Address, $add$$Register);
10766 %}
10767 ins_pipe(pipe_cmpxchg);
10768 %}
10769
10770 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10771 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10772 match(Set dummy (GetAndAddS mem add));
10773 effect(KILL cr);
10774 format %{ "addw_lock $mem, $add" %}
10775 ins_encode %{
10776 __ lock();
10777 __ addw($mem$$Address, $add$$constant);
10778 %}
10779 ins_pipe(pipe_cmpxchg);
10780 %}
10781
10782 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10783 predicate(!n->as_LoadStore()->result_not_used());
10784 match(Set newval (GetAndAddS mem newval));
10785 effect(KILL cr);
10786 format %{ "xaddw_lock $mem, $newval" %}
10787 ins_encode %{
10788 __ lock();
10789 __ xaddw($mem$$Address, $newval$$Register);
10790 %}
10791 ins_pipe(pipe_cmpxchg);
10792 %}
10793
10794 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10795 predicate(n->as_LoadStore()->result_not_used());
10796 match(Set dummy (GetAndAddI mem add));
10797 effect(KILL cr);
10798 format %{ "addl_lock $mem, $add" %}
10799 ins_encode %{
10800 __ lock();
10801 __ addl($mem$$Address, $add$$Register);
10802 %}
10803 ins_pipe(pipe_cmpxchg);
10804 %}
10805
10806 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10807 predicate(n->as_LoadStore()->result_not_used());
10808 match(Set dummy (GetAndAddI mem add));
10809 effect(KILL cr);
10810 format %{ "addl_lock $mem, $add" %}
10811 ins_encode %{
10812 __ lock();
10813 __ addl($mem$$Address, $add$$constant);
10814 %}
10815 ins_pipe(pipe_cmpxchg);
10816 %}
10817
10818 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10819 predicate(!n->as_LoadStore()->result_not_used());
10820 match(Set newval (GetAndAddI mem newval));
10821 effect(KILL cr);
10822 format %{ "xaddl_lock $mem, $newval" %}
10823 ins_encode %{
10824 __ lock();
10825 __ xaddl($mem$$Address, $newval$$Register);
10826 %}
10827 ins_pipe(pipe_cmpxchg);
10828 %}
10829
10830 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10831 predicate(n->as_LoadStore()->result_not_used());
10832 match(Set dummy (GetAndAddL mem add));
10833 effect(KILL cr);
10834 format %{ "addq_lock $mem, $add" %}
10835 ins_encode %{
10836 __ lock();
10837 __ addq($mem$$Address, $add$$Register);
10838 %}
10839 ins_pipe(pipe_cmpxchg);
10840 %}
10841
10842 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10843 predicate(n->as_LoadStore()->result_not_used());
10844 match(Set dummy (GetAndAddL mem add));
10845 effect(KILL cr);
10846 format %{ "addq_lock $mem, $add" %}
10847 ins_encode %{
10848 __ lock();
10849 __ addq($mem$$Address, $add$$constant);
10850 %}
10851 ins_pipe(pipe_cmpxchg);
10852 %}
10853
10854 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10855 predicate(!n->as_LoadStore()->result_not_used());
10856 match(Set newval (GetAndAddL mem newval));
10857 effect(KILL cr);
10858 format %{ "xaddq_lock $mem, $newval" %}
10859 ins_encode %{
10860 __ lock();
10861 __ xaddq($mem$$Address, $newval$$Register);
10862 %}
10863 ins_pipe(pipe_cmpxchg);
10864 %}
10865
10866 instruct xchgB( memory mem, rRegI newval) %{
10867 match(Set newval (GetAndSetB mem newval));
10868 format %{ "XCHGB $newval,[$mem]" %}
10869 ins_encode %{
10870 __ xchgb($newval$$Register, $mem$$Address);
10871 %}
10872 ins_pipe( pipe_cmpxchg );
10873 %}
10874
10875 instruct xchgS( memory mem, rRegI newval) %{
10876 match(Set newval (GetAndSetS mem newval));
10877 format %{ "XCHGW $newval,[$mem]" %}
10878 ins_encode %{
10879 __ xchgw($newval$$Register, $mem$$Address);
10880 %}
10881 ins_pipe( pipe_cmpxchg );
10882 %}
10883
10884 instruct xchgI( memory mem, rRegI newval) %{
10885 match(Set newval (GetAndSetI mem newval));
10886 format %{ "XCHGL $newval,[$mem]" %}
10887 ins_encode %{
10888 __ xchgl($newval$$Register, $mem$$Address);
10889 %}
10890 ins_pipe( pipe_cmpxchg );
10891 %}
10892
10893 instruct xchgL( memory mem, rRegL newval) %{
10894 match(Set newval (GetAndSetL mem newval));
10895 format %{ "XCHGL $newval,[$mem]" %}
10896 ins_encode %{
10897 __ xchgq($newval$$Register, $mem$$Address);
10898 %}
10899 ins_pipe( pipe_cmpxchg );
10900 %}
10901
10902 instruct xchgP( memory mem, rRegP newval) %{
10903 match(Set newval (GetAndSetP mem newval));
10904 predicate(n->as_LoadStore()->barrier_data() == 0);
10905 format %{ "XCHGQ $newval,[$mem]" %}
10906 ins_encode %{
10907 __ xchgq($newval$$Register, $mem$$Address);
10908 %}
10909 ins_pipe( pipe_cmpxchg );
10910 %}
10911
10912 instruct xchgN( memory mem, rRegN newval) %{
10913 predicate(n->as_LoadStore()->barrier_data() == 0);
10914 match(Set newval (GetAndSetN mem newval));
10915 format %{ "XCHGL $newval,$mem]" %}
10916 ins_encode %{
10917 __ xchgl($newval$$Register, $mem$$Address);
10918 %}
10919 ins_pipe( pipe_cmpxchg );
10920 %}
10921
10922 //----------Abs Instructions-------------------------------------------
10923
10924 // Integer Absolute Instructions
10925 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10926 %{
10927 match(Set dst (AbsI src));
10928 effect(TEMP dst, KILL cr);
10929 format %{ "xorl $dst, $dst\t# abs int\n\t"
10930 "subl $dst, $src\n\t"
10931 "cmovll $dst, $src" %}
10932 ins_encode %{
10933 __ xorl($dst$$Register, $dst$$Register);
10934 __ subl($dst$$Register, $src$$Register);
10935 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10936 %}
10937
10938 ins_pipe(ialu_reg_reg);
10939 %}
10940
10941 // Long Absolute Instructions
10942 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10943 %{
10944 match(Set dst (AbsL src));
10945 effect(TEMP dst, KILL cr);
10946 format %{ "xorl $dst, $dst\t# abs long\n\t"
10947 "subq $dst, $src\n\t"
10948 "cmovlq $dst, $src" %}
10949 ins_encode %{
10950 __ xorl($dst$$Register, $dst$$Register);
10951 __ subq($dst$$Register, $src$$Register);
10952 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10953 %}
10954
10955 ins_pipe(ialu_reg_reg);
10956 %}
10957
10958 //----------Subtraction Instructions-------------------------------------------
10959
10960 // Integer Subtraction Instructions
10961 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10962 %{
10963 predicate(!UseAPX);
10964 match(Set dst (SubI dst src));
10965 effect(KILL cr);
10966 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10967
10968 format %{ "subl $dst, $src\t# int" %}
10969 ins_encode %{
10970 __ subl($dst$$Register, $src$$Register);
10971 %}
10972 ins_pipe(ialu_reg_reg);
10973 %}
10974
10975 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10976 %{
10977 predicate(UseAPX);
10978 match(Set dst (SubI src1 src2));
10979 effect(KILL cr);
10980 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10981
10982 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10983 ins_encode %{
10984 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10985 %}
10986 ins_pipe(ialu_reg_reg);
10987 %}
10988
10989 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10990 %{
10991 predicate(UseAPX);
10992 match(Set dst (SubI src1 src2));
10993 effect(KILL cr);
10994 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10995
10996 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10997 ins_encode %{
10998 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
10999 %}
11000 ins_pipe(ialu_reg_reg);
11001 %}
11002
11003 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11004 %{
11005 predicate(UseAPX);
11006 match(Set dst (SubI (LoadI src1) src2));
11007 effect(KILL cr);
11008 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11009
11010 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11011 ins_encode %{
11012 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11013 %}
11014 ins_pipe(ialu_reg_reg);
11015 %}
11016
11017 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11018 %{
11019 predicate(!UseAPX);
11020 match(Set dst (SubI dst (LoadI src)));
11021 effect(KILL cr);
11022 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11023
11024 ins_cost(150);
11025 format %{ "subl $dst, $src\t# int" %}
11026 ins_encode %{
11027 __ subl($dst$$Register, $src$$Address);
11028 %}
11029 ins_pipe(ialu_reg_mem);
11030 %}
11031
11032 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11033 %{
11034 predicate(UseAPX);
11035 match(Set dst (SubI src1 (LoadI src2)));
11036 effect(KILL cr);
11037 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11038
11039 ins_cost(150);
11040 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11041 ins_encode %{
11042 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11043 %}
11044 ins_pipe(ialu_reg_mem);
11045 %}
11046
11047 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11048 %{
11049 predicate(UseAPX);
11050 match(Set dst (SubI (LoadI src1) src2));
11051 effect(KILL cr);
11052 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11053
11054 ins_cost(150);
11055 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11056 ins_encode %{
11057 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11058 %}
11059 ins_pipe(ialu_reg_mem);
11060 %}
11061
11062 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11063 %{
11064 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11065 effect(KILL cr);
11066 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11067
11068 ins_cost(150);
11069 format %{ "subl $dst, $src\t# int" %}
11070 ins_encode %{
11071 __ subl($dst$$Address, $src$$Register);
11072 %}
11073 ins_pipe(ialu_mem_reg);
11074 %}
11075
11076 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11077 %{
11078 predicate(!UseAPX);
11079 match(Set dst (SubL dst src));
11080 effect(KILL cr);
11081 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11082
11083 format %{ "subq $dst, $src\t# long" %}
11084 ins_encode %{
11085 __ subq($dst$$Register, $src$$Register);
11086 %}
11087 ins_pipe(ialu_reg_reg);
11088 %}
11089
11090 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11091 %{
11092 predicate(UseAPX);
11093 match(Set dst (SubL src1 src2));
11094 effect(KILL cr);
11095 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11096
11097 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11098 ins_encode %{
11099 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11100 %}
11101 ins_pipe(ialu_reg_reg);
11102 %}
11103
11104 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11105 %{
11106 predicate(UseAPX);
11107 match(Set dst (SubL src1 src2));
11108 effect(KILL cr);
11109 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11110
11111 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11112 ins_encode %{
11113 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11114 %}
11115 ins_pipe(ialu_reg_reg);
11116 %}
11117
11118 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11119 %{
11120 predicate(UseAPX);
11121 match(Set dst (SubL (LoadL src1) src2));
11122 effect(KILL cr);
11123 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11124
11125 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11126 ins_encode %{
11127 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11128 %}
11129 ins_pipe(ialu_reg_reg);
11130 %}
11131
11132 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11133 %{
11134 predicate(!UseAPX);
11135 match(Set dst (SubL dst (LoadL src)));
11136 effect(KILL cr);
11137 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11138
11139 ins_cost(150);
11140 format %{ "subq $dst, $src\t# long" %}
11141 ins_encode %{
11142 __ subq($dst$$Register, $src$$Address);
11143 %}
11144 ins_pipe(ialu_reg_mem);
11145 %}
11146
11147 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11148 %{
11149 predicate(UseAPX);
11150 match(Set dst (SubL src1 (LoadL src2)));
11151 effect(KILL cr);
11152 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11153
11154 ins_cost(150);
11155 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11156 ins_encode %{
11157 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11158 %}
11159 ins_pipe(ialu_reg_mem);
11160 %}
11161
11162 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11163 %{
11164 predicate(UseAPX);
11165 match(Set dst (SubL (LoadL src1) src2));
11166 effect(KILL cr);
11167 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11168
11169 ins_cost(150);
11170 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11171 ins_encode %{
11172 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11173 %}
11174 ins_pipe(ialu_reg_mem);
11175 %}
11176
11177 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11178 %{
11179 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11180 effect(KILL cr);
11181 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11182
11183 ins_cost(150);
11184 format %{ "subq $dst, $src\t# long" %}
11185 ins_encode %{
11186 __ subq($dst$$Address, $src$$Register);
11187 %}
11188 ins_pipe(ialu_mem_reg);
11189 %}
11190
11191 // Subtract from a pointer
11192 // XXX hmpf???
11193 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11194 %{
11195 match(Set dst (AddP dst (SubI zero src)));
11196 effect(KILL cr);
11197
11198 format %{ "subq $dst, $src\t# ptr - int" %}
11199 ins_encode %{
11200 __ subq($dst$$Register, $src$$Register);
11201 %}
11202 ins_pipe(ialu_reg_reg);
11203 %}
11204
11205 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11206 %{
11207 predicate(!UseAPX);
11208 match(Set dst (SubI zero dst));
11209 effect(KILL cr);
11210 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11211
11212 format %{ "negl $dst\t# int" %}
11213 ins_encode %{
11214 __ negl($dst$$Register);
11215 %}
11216 ins_pipe(ialu_reg);
11217 %}
11218
11219 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11220 %{
11221 predicate(UseAPX);
11222 match(Set dst (SubI zero src));
11223 effect(KILL cr);
11224 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11225
11226 format %{ "enegl $dst, $src\t# int ndd" %}
11227 ins_encode %{
11228 __ enegl($dst$$Register, $src$$Register, false);
11229 %}
11230 ins_pipe(ialu_reg);
11231 %}
11232
11233 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11234 %{
11235 predicate(!UseAPX);
11236 match(Set dst (NegI dst));
11237 effect(KILL cr);
11238 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11239
11240 format %{ "negl $dst\t# int" %}
11241 ins_encode %{
11242 __ negl($dst$$Register);
11243 %}
11244 ins_pipe(ialu_reg);
11245 %}
11246
11247 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11248 %{
11249 predicate(UseAPX);
11250 match(Set dst (NegI src));
11251 effect(KILL cr);
11252 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11253
11254 format %{ "enegl $dst, $src\t# int ndd" %}
11255 ins_encode %{
11256 __ enegl($dst$$Register, $src$$Register, false);
11257 %}
11258 ins_pipe(ialu_reg);
11259 %}
11260
11261 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11262 %{
11263 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11264 effect(KILL cr);
11265 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11266
11267 format %{ "negl $dst\t# int" %}
11268 ins_encode %{
11269 __ negl($dst$$Address);
11270 %}
11271 ins_pipe(ialu_reg);
11272 %}
11273
11274 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11275 %{
11276 predicate(!UseAPX);
11277 match(Set dst (SubL zero dst));
11278 effect(KILL cr);
11279 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11280
11281 format %{ "negq $dst\t# long" %}
11282 ins_encode %{
11283 __ negq($dst$$Register);
11284 %}
11285 ins_pipe(ialu_reg);
11286 %}
11287
11288 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11289 %{
11290 predicate(UseAPX);
11291 match(Set dst (SubL zero src));
11292 effect(KILL cr);
11293 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11294
11295 format %{ "enegq $dst, $src\t# long ndd" %}
11296 ins_encode %{
11297 __ enegq($dst$$Register, $src$$Register, false);
11298 %}
11299 ins_pipe(ialu_reg);
11300 %}
11301
11302 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11303 %{
11304 predicate(!UseAPX);
11305 match(Set dst (NegL dst));
11306 effect(KILL cr);
11307 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11308
11309 format %{ "negq $dst\t# int" %}
11310 ins_encode %{
11311 __ negq($dst$$Register);
11312 %}
11313 ins_pipe(ialu_reg);
11314 %}
11315
11316 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11317 %{
11318 predicate(UseAPX);
11319 match(Set dst (NegL src));
11320 effect(KILL cr);
11321 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11322
11323 format %{ "enegq $dst, $src\t# long ndd" %}
11324 ins_encode %{
11325 __ enegq($dst$$Register, $src$$Register, false);
11326 %}
11327 ins_pipe(ialu_reg);
11328 %}
11329
11330 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11331 %{
11332 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11333 effect(KILL cr);
11334 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11335
11336 format %{ "negq $dst\t# long" %}
11337 ins_encode %{
11338 __ negq($dst$$Address);
11339 %}
11340 ins_pipe(ialu_reg);
11341 %}
11342
11343 //----------Multiplication/Division Instructions-------------------------------
11344 // Integer Multiplication Instructions
11345 // Multiply Register
11346
11347 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11348 %{
11349 predicate(!UseAPX);
11350 match(Set dst (MulI dst src));
11351 effect(KILL cr);
11352
11353 ins_cost(300);
11354 format %{ "imull $dst, $src\t# int" %}
11355 ins_encode %{
11356 __ imull($dst$$Register, $src$$Register);
11357 %}
11358 ins_pipe(ialu_reg_reg_alu0);
11359 %}
11360
11361 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11362 %{
11363 predicate(UseAPX);
11364 match(Set dst (MulI src1 src2));
11365 effect(KILL cr);
11366
11367 ins_cost(300);
11368 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11369 ins_encode %{
11370 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11371 %}
11372 ins_pipe(ialu_reg_reg_alu0);
11373 %}
11374
11375 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11376 %{
11377 match(Set dst (MulI src imm));
11378 effect(KILL cr);
11379
11380 ins_cost(300);
11381 format %{ "imull $dst, $src, $imm\t# int" %}
11382 ins_encode %{
11383 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11384 %}
11385 ins_pipe(ialu_reg_reg_alu0);
11386 %}
11387
11388 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11389 %{
11390 predicate(!UseAPX);
11391 match(Set dst (MulI dst (LoadI src)));
11392 effect(KILL cr);
11393
11394 ins_cost(350);
11395 format %{ "imull $dst, $src\t# int" %}
11396 ins_encode %{
11397 __ imull($dst$$Register, $src$$Address);
11398 %}
11399 ins_pipe(ialu_reg_mem_alu0);
11400 %}
11401
11402 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11403 %{
11404 predicate(UseAPX);
11405 match(Set dst (MulI src1 (LoadI src2)));
11406 effect(KILL cr);
11407
11408 ins_cost(350);
11409 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11410 ins_encode %{
11411 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11412 %}
11413 ins_pipe(ialu_reg_mem_alu0);
11414 %}
11415
11416 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11417 %{
11418 match(Set dst (MulI (LoadI src) imm));
11419 effect(KILL cr);
11420
11421 ins_cost(300);
11422 format %{ "imull $dst, $src, $imm\t# int" %}
11423 ins_encode %{
11424 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11425 %}
11426 ins_pipe(ialu_reg_mem_alu0);
11427 %}
11428
11429 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11430 %{
11431 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11432 effect(KILL cr, KILL src2);
11433
11434 expand %{ mulI_rReg(dst, src1, cr);
11435 mulI_rReg(src2, src3, cr);
11436 addI_rReg(dst, src2, cr); %}
11437 %}
11438
11439 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11440 %{
11441 predicate(!UseAPX);
11442 match(Set dst (MulL dst src));
11443 effect(KILL cr);
11444
11445 ins_cost(300);
11446 format %{ "imulq $dst, $src\t# long" %}
11447 ins_encode %{
11448 __ imulq($dst$$Register, $src$$Register);
11449 %}
11450 ins_pipe(ialu_reg_reg_alu0);
11451 %}
11452
11453 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11454 %{
11455 predicate(UseAPX);
11456 match(Set dst (MulL src1 src2));
11457 effect(KILL cr);
11458
11459 ins_cost(300);
11460 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11461 ins_encode %{
11462 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11463 %}
11464 ins_pipe(ialu_reg_reg_alu0);
11465 %}
11466
11467 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11468 %{
11469 match(Set dst (MulL src imm));
11470 effect(KILL cr);
11471
11472 ins_cost(300);
11473 format %{ "imulq $dst, $src, $imm\t# long" %}
11474 ins_encode %{
11475 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11476 %}
11477 ins_pipe(ialu_reg_reg_alu0);
11478 %}
11479
11480 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11481 %{
11482 predicate(!UseAPX);
11483 match(Set dst (MulL dst (LoadL src)));
11484 effect(KILL cr);
11485
11486 ins_cost(350);
11487 format %{ "imulq $dst, $src\t# long" %}
11488 ins_encode %{
11489 __ imulq($dst$$Register, $src$$Address);
11490 %}
11491 ins_pipe(ialu_reg_mem_alu0);
11492 %}
11493
11494 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11495 %{
11496 predicate(UseAPX);
11497 match(Set dst (MulL src1 (LoadL src2)));
11498 effect(KILL cr);
11499
11500 ins_cost(350);
11501 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11502 ins_encode %{
11503 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11504 %}
11505 ins_pipe(ialu_reg_mem_alu0);
11506 %}
11507
11508 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11509 %{
11510 match(Set dst (MulL (LoadL src) imm));
11511 effect(KILL cr);
11512
11513 ins_cost(300);
11514 format %{ "imulq $dst, $src, $imm\t# long" %}
11515 ins_encode %{
11516 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11517 %}
11518 ins_pipe(ialu_reg_mem_alu0);
11519 %}
11520
11521 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11522 %{
11523 match(Set dst (MulHiL src rax));
11524 effect(USE_KILL rax, KILL cr);
11525
11526 ins_cost(300);
11527 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11528 ins_encode %{
11529 __ imulq($src$$Register);
11530 %}
11531 ins_pipe(ialu_reg_reg_alu0);
11532 %}
11533
11534 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11535 %{
11536 match(Set dst (UMulHiL src rax));
11537 effect(USE_KILL rax, KILL cr);
11538
11539 ins_cost(300);
11540 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11541 ins_encode %{
11542 __ mulq($src$$Register);
11543 %}
11544 ins_pipe(ialu_reg_reg_alu0);
11545 %}
11546
11547 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11548 rFlagsReg cr)
11549 %{
11550 match(Set rax (DivI rax div));
11551 effect(KILL rdx, KILL cr);
11552
11553 ins_cost(30*100+10*100); // XXX
11554 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11555 "jne,s normal\n\t"
11556 "xorl rdx, rdx\n\t"
11557 "cmpl $div, -1\n\t"
11558 "je,s done\n"
11559 "normal: cdql\n\t"
11560 "idivl $div\n"
11561 "done:" %}
11562 ins_encode(cdql_enc(div));
11563 ins_pipe(ialu_reg_reg_alu0);
11564 %}
11565
11566 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11567 rFlagsReg cr)
11568 %{
11569 match(Set rax (DivL rax div));
11570 effect(KILL rdx, KILL cr);
11571
11572 ins_cost(30*100+10*100); // XXX
11573 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11574 "cmpq rax, rdx\n\t"
11575 "jne,s normal\n\t"
11576 "xorl rdx, rdx\n\t"
11577 "cmpq $div, -1\n\t"
11578 "je,s done\n"
11579 "normal: cdqq\n\t"
11580 "idivq $div\n"
11581 "done:" %}
11582 ins_encode(cdqq_enc(div));
11583 ins_pipe(ialu_reg_reg_alu0);
11584 %}
11585
11586 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11587 %{
11588 match(Set rax (UDivI rax div));
11589 effect(KILL rdx, KILL cr);
11590
11591 ins_cost(300);
11592 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11593 ins_encode %{
11594 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11595 %}
11596 ins_pipe(ialu_reg_reg_alu0);
11597 %}
11598
11599 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11600 %{
11601 match(Set rax (UDivL rax div));
11602 effect(KILL rdx, KILL cr);
11603
11604 ins_cost(300);
11605 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11606 ins_encode %{
11607 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11608 %}
11609 ins_pipe(ialu_reg_reg_alu0);
11610 %}
11611
11612 // Integer DIVMOD with Register, both quotient and mod results
11613 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11614 rFlagsReg cr)
11615 %{
11616 match(DivModI rax div);
11617 effect(KILL cr);
11618
11619 ins_cost(30*100+10*100); // XXX
11620 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11621 "jne,s normal\n\t"
11622 "xorl rdx, rdx\n\t"
11623 "cmpl $div, -1\n\t"
11624 "je,s done\n"
11625 "normal: cdql\n\t"
11626 "idivl $div\n"
11627 "done:" %}
11628 ins_encode(cdql_enc(div));
11629 ins_pipe(pipe_slow);
11630 %}
11631
11632 // Long DIVMOD with Register, both quotient and mod results
11633 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11634 rFlagsReg cr)
11635 %{
11636 match(DivModL rax div);
11637 effect(KILL cr);
11638
11639 ins_cost(30*100+10*100); // XXX
11640 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11641 "cmpq rax, rdx\n\t"
11642 "jne,s normal\n\t"
11643 "xorl rdx, rdx\n\t"
11644 "cmpq $div, -1\n\t"
11645 "je,s done\n"
11646 "normal: cdqq\n\t"
11647 "idivq $div\n"
11648 "done:" %}
11649 ins_encode(cdqq_enc(div));
11650 ins_pipe(pipe_slow);
11651 %}
11652
11653 // Unsigned integer DIVMOD with Register, both quotient and mod results
11654 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11655 no_rax_rdx_RegI div, rFlagsReg cr)
11656 %{
11657 match(UDivModI rax div);
11658 effect(TEMP tmp, KILL cr);
11659
11660 ins_cost(300);
11661 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11662 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11663 %}
11664 ins_encode %{
11665 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11666 %}
11667 ins_pipe(pipe_slow);
11668 %}
11669
11670 // Unsigned long DIVMOD with Register, both quotient and mod results
11671 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11672 no_rax_rdx_RegL div, rFlagsReg cr)
11673 %{
11674 match(UDivModL rax div);
11675 effect(TEMP tmp, KILL cr);
11676
11677 ins_cost(300);
11678 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11679 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11680 %}
11681 ins_encode %{
11682 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11683 %}
11684 ins_pipe(pipe_slow);
11685 %}
11686
11687 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11688 rFlagsReg cr)
11689 %{
11690 match(Set rdx (ModI rax div));
11691 effect(KILL rax, KILL cr);
11692
11693 ins_cost(300); // XXX
11694 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11695 "jne,s normal\n\t"
11696 "xorl rdx, rdx\n\t"
11697 "cmpl $div, -1\n\t"
11698 "je,s done\n"
11699 "normal: cdql\n\t"
11700 "idivl $div\n"
11701 "done:" %}
11702 ins_encode(cdql_enc(div));
11703 ins_pipe(ialu_reg_reg_alu0);
11704 %}
11705
11706 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11707 rFlagsReg cr)
11708 %{
11709 match(Set rdx (ModL rax div));
11710 effect(KILL rax, KILL cr);
11711
11712 ins_cost(300); // XXX
11713 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11714 "cmpq rax, rdx\n\t"
11715 "jne,s normal\n\t"
11716 "xorl rdx, rdx\n\t"
11717 "cmpq $div, -1\n\t"
11718 "je,s done\n"
11719 "normal: cdqq\n\t"
11720 "idivq $div\n"
11721 "done:" %}
11722 ins_encode(cdqq_enc(div));
11723 ins_pipe(ialu_reg_reg_alu0);
11724 %}
11725
11726 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11727 %{
11728 match(Set rdx (UModI rax div));
11729 effect(KILL rax, KILL cr);
11730
11731 ins_cost(300);
11732 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11733 ins_encode %{
11734 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11735 %}
11736 ins_pipe(ialu_reg_reg_alu0);
11737 %}
11738
11739 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11740 %{
11741 match(Set rdx (UModL rax div));
11742 effect(KILL rax, KILL cr);
11743
11744 ins_cost(300);
11745 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11746 ins_encode %{
11747 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11748 %}
11749 ins_pipe(ialu_reg_reg_alu0);
11750 %}
11751
11752 // Integer Shift Instructions
11753 // Shift Left by one, two, three
11754 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11755 %{
11756 predicate(!UseAPX);
11757 match(Set dst (LShiftI dst shift));
11758 effect(KILL cr);
11759
11760 format %{ "sall $dst, $shift" %}
11761 ins_encode %{
11762 __ sall($dst$$Register, $shift$$constant);
11763 %}
11764 ins_pipe(ialu_reg);
11765 %}
11766
11767 // Shift Left by one, two, three
11768 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11769 %{
11770 predicate(UseAPX);
11771 match(Set dst (LShiftI src shift));
11772 effect(KILL cr);
11773
11774 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11775 ins_encode %{
11776 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11777 %}
11778 ins_pipe(ialu_reg);
11779 %}
11780
11781 // Shift Left by 8-bit immediate
11782 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11783 %{
11784 predicate(!UseAPX);
11785 match(Set dst (LShiftI dst shift));
11786 effect(KILL cr);
11787
11788 format %{ "sall $dst, $shift" %}
11789 ins_encode %{
11790 __ sall($dst$$Register, $shift$$constant);
11791 %}
11792 ins_pipe(ialu_reg);
11793 %}
11794
11795 // Shift Left by 8-bit immediate
11796 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11797 %{
11798 predicate(UseAPX);
11799 match(Set dst (LShiftI src shift));
11800 effect(KILL cr);
11801
11802 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11803 ins_encode %{
11804 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11805 %}
11806 ins_pipe(ialu_reg);
11807 %}
11808
11809 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11810 %{
11811 predicate(UseAPX);
11812 match(Set dst (LShiftI (LoadI src) shift));
11813 effect(KILL cr);
11814
11815 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11816 ins_encode %{
11817 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11818 %}
11819 ins_pipe(ialu_reg);
11820 %}
11821
11822 // Shift Left by 8-bit immediate
11823 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11824 %{
11825 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11826 effect(KILL cr);
11827
11828 format %{ "sall $dst, $shift" %}
11829 ins_encode %{
11830 __ sall($dst$$Address, $shift$$constant);
11831 %}
11832 ins_pipe(ialu_mem_imm);
11833 %}
11834
11835 // Shift Left by variable
11836 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11837 %{
11838 predicate(!VM_Version::supports_bmi2());
11839 match(Set dst (LShiftI dst shift));
11840 effect(KILL cr);
11841
11842 format %{ "sall $dst, $shift" %}
11843 ins_encode %{
11844 __ sall($dst$$Register);
11845 %}
11846 ins_pipe(ialu_reg_reg);
11847 %}
11848
11849 // Shift Left by variable
11850 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11851 %{
11852 predicate(!VM_Version::supports_bmi2());
11853 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11854 effect(KILL cr);
11855
11856 format %{ "sall $dst, $shift" %}
11857 ins_encode %{
11858 __ sall($dst$$Address);
11859 %}
11860 ins_pipe(ialu_mem_reg);
11861 %}
11862
11863 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11864 %{
11865 predicate(VM_Version::supports_bmi2());
11866 match(Set dst (LShiftI src shift));
11867
11868 format %{ "shlxl $dst, $src, $shift" %}
11869 ins_encode %{
11870 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11871 %}
11872 ins_pipe(ialu_reg_reg);
11873 %}
11874
11875 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11876 %{
11877 predicate(VM_Version::supports_bmi2());
11878 match(Set dst (LShiftI (LoadI src) shift));
11879 ins_cost(175);
11880 format %{ "shlxl $dst, $src, $shift" %}
11881 ins_encode %{
11882 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11883 %}
11884 ins_pipe(ialu_reg_mem);
11885 %}
11886
11887 // Arithmetic Shift Right by 8-bit immediate
11888 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11889 %{
11890 predicate(!UseAPX);
11891 match(Set dst (RShiftI dst shift));
11892 effect(KILL cr);
11893
11894 format %{ "sarl $dst, $shift" %}
11895 ins_encode %{
11896 __ sarl($dst$$Register, $shift$$constant);
11897 %}
11898 ins_pipe(ialu_mem_imm);
11899 %}
11900
11901 // Arithmetic Shift Right by 8-bit immediate
11902 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11903 %{
11904 predicate(UseAPX);
11905 match(Set dst (RShiftI src shift));
11906 effect(KILL cr);
11907
11908 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11909 ins_encode %{
11910 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11911 %}
11912 ins_pipe(ialu_mem_imm);
11913 %}
11914
11915 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11916 %{
11917 predicate(UseAPX);
11918 match(Set dst (RShiftI (LoadI src) shift));
11919 effect(KILL cr);
11920
11921 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11922 ins_encode %{
11923 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11924 %}
11925 ins_pipe(ialu_mem_imm);
11926 %}
11927
11928 // Arithmetic Shift Right by 8-bit immediate
11929 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11930 %{
11931 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11932 effect(KILL cr);
11933
11934 format %{ "sarl $dst, $shift" %}
11935 ins_encode %{
11936 __ sarl($dst$$Address, $shift$$constant);
11937 %}
11938 ins_pipe(ialu_mem_imm);
11939 %}
11940
11941 // Arithmetic Shift Right by variable
11942 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11943 %{
11944 predicate(!VM_Version::supports_bmi2());
11945 match(Set dst (RShiftI dst shift));
11946 effect(KILL cr);
11947
11948 format %{ "sarl $dst, $shift" %}
11949 ins_encode %{
11950 __ sarl($dst$$Register);
11951 %}
11952 ins_pipe(ialu_reg_reg);
11953 %}
11954
11955 // Arithmetic Shift Right by variable
11956 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11957 %{
11958 predicate(!VM_Version::supports_bmi2());
11959 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11960 effect(KILL cr);
11961
11962 format %{ "sarl $dst, $shift" %}
11963 ins_encode %{
11964 __ sarl($dst$$Address);
11965 %}
11966 ins_pipe(ialu_mem_reg);
11967 %}
11968
11969 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11970 %{
11971 predicate(VM_Version::supports_bmi2());
11972 match(Set dst (RShiftI src shift));
11973
11974 format %{ "sarxl $dst, $src, $shift" %}
11975 ins_encode %{
11976 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11977 %}
11978 ins_pipe(ialu_reg_reg);
11979 %}
11980
11981 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11982 %{
11983 predicate(VM_Version::supports_bmi2());
11984 match(Set dst (RShiftI (LoadI src) shift));
11985 ins_cost(175);
11986 format %{ "sarxl $dst, $src, $shift" %}
11987 ins_encode %{
11988 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11989 %}
11990 ins_pipe(ialu_reg_mem);
11991 %}
11992
11993 // Logical Shift Right by 8-bit immediate
11994 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11995 %{
11996 predicate(!UseAPX);
11997 match(Set dst (URShiftI dst shift));
11998 effect(KILL cr);
11999
12000 format %{ "shrl $dst, $shift" %}
12001 ins_encode %{
12002 __ shrl($dst$$Register, $shift$$constant);
12003 %}
12004 ins_pipe(ialu_reg);
12005 %}
12006
12007 // Logical Shift Right by 8-bit immediate
12008 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12009 %{
12010 predicate(UseAPX);
12011 match(Set dst (URShiftI src shift));
12012 effect(KILL cr);
12013
12014 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12015 ins_encode %{
12016 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12017 %}
12018 ins_pipe(ialu_reg);
12019 %}
12020
12021 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12022 %{
12023 predicate(UseAPX);
12024 match(Set dst (URShiftI (LoadI src) shift));
12025 effect(KILL cr);
12026
12027 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12028 ins_encode %{
12029 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12030 %}
12031 ins_pipe(ialu_reg);
12032 %}
12033
12034 // Logical Shift Right by 8-bit immediate
12035 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12036 %{
12037 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12038 effect(KILL cr);
12039
12040 format %{ "shrl $dst, $shift" %}
12041 ins_encode %{
12042 __ shrl($dst$$Address, $shift$$constant);
12043 %}
12044 ins_pipe(ialu_mem_imm);
12045 %}
12046
12047 // Logical Shift Right by variable
12048 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12049 %{
12050 predicate(!VM_Version::supports_bmi2());
12051 match(Set dst (URShiftI dst shift));
12052 effect(KILL cr);
12053
12054 format %{ "shrl $dst, $shift" %}
12055 ins_encode %{
12056 __ shrl($dst$$Register);
12057 %}
12058 ins_pipe(ialu_reg_reg);
12059 %}
12060
12061 // Logical Shift Right by variable
12062 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12063 %{
12064 predicate(!VM_Version::supports_bmi2());
12065 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12066 effect(KILL cr);
12067
12068 format %{ "shrl $dst, $shift" %}
12069 ins_encode %{
12070 __ shrl($dst$$Address);
12071 %}
12072 ins_pipe(ialu_mem_reg);
12073 %}
12074
12075 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12076 %{
12077 predicate(VM_Version::supports_bmi2());
12078 match(Set dst (URShiftI src shift));
12079
12080 format %{ "shrxl $dst, $src, $shift" %}
12081 ins_encode %{
12082 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12083 %}
12084 ins_pipe(ialu_reg_reg);
12085 %}
12086
12087 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12088 %{
12089 predicate(VM_Version::supports_bmi2());
12090 match(Set dst (URShiftI (LoadI src) shift));
12091 ins_cost(175);
12092 format %{ "shrxl $dst, $src, $shift" %}
12093 ins_encode %{
12094 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12095 %}
12096 ins_pipe(ialu_reg_mem);
12097 %}
12098
12099 // Long Shift Instructions
12100 // Shift Left by one, two, three
12101 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12102 %{
12103 predicate(!UseAPX);
12104 match(Set dst (LShiftL dst shift));
12105 effect(KILL cr);
12106
12107 format %{ "salq $dst, $shift" %}
12108 ins_encode %{
12109 __ salq($dst$$Register, $shift$$constant);
12110 %}
12111 ins_pipe(ialu_reg);
12112 %}
12113
12114 // Shift Left by one, two, three
12115 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12116 %{
12117 predicate(UseAPX);
12118 match(Set dst (LShiftL src shift));
12119 effect(KILL cr);
12120
12121 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12122 ins_encode %{
12123 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12124 %}
12125 ins_pipe(ialu_reg);
12126 %}
12127
12128 // Shift Left by 8-bit immediate
12129 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12130 %{
12131 predicate(!UseAPX);
12132 match(Set dst (LShiftL dst shift));
12133 effect(KILL cr);
12134
12135 format %{ "salq $dst, $shift" %}
12136 ins_encode %{
12137 __ salq($dst$$Register, $shift$$constant);
12138 %}
12139 ins_pipe(ialu_reg);
12140 %}
12141
12142 // Shift Left by 8-bit immediate
12143 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12144 %{
12145 predicate(UseAPX);
12146 match(Set dst (LShiftL src shift));
12147 effect(KILL cr);
12148
12149 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12150 ins_encode %{
12151 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12152 %}
12153 ins_pipe(ialu_reg);
12154 %}
12155
12156 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12157 %{
12158 predicate(UseAPX);
12159 match(Set dst (LShiftL (LoadL src) shift));
12160 effect(KILL cr);
12161
12162 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12163 ins_encode %{
12164 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12165 %}
12166 ins_pipe(ialu_reg);
12167 %}
12168
12169 // Shift Left by 8-bit immediate
12170 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12171 %{
12172 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12173 effect(KILL cr);
12174
12175 format %{ "salq $dst, $shift" %}
12176 ins_encode %{
12177 __ salq($dst$$Address, $shift$$constant);
12178 %}
12179 ins_pipe(ialu_mem_imm);
12180 %}
12181
12182 // Shift Left by variable
12183 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12184 %{
12185 predicate(!VM_Version::supports_bmi2());
12186 match(Set dst (LShiftL dst shift));
12187 effect(KILL cr);
12188
12189 format %{ "salq $dst, $shift" %}
12190 ins_encode %{
12191 __ salq($dst$$Register);
12192 %}
12193 ins_pipe(ialu_reg_reg);
12194 %}
12195
12196 // Shift Left by variable
12197 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12198 %{
12199 predicate(!VM_Version::supports_bmi2());
12200 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12201 effect(KILL cr);
12202
12203 format %{ "salq $dst, $shift" %}
12204 ins_encode %{
12205 __ salq($dst$$Address);
12206 %}
12207 ins_pipe(ialu_mem_reg);
12208 %}
12209
12210 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12211 %{
12212 predicate(VM_Version::supports_bmi2());
12213 match(Set dst (LShiftL src shift));
12214
12215 format %{ "shlxq $dst, $src, $shift" %}
12216 ins_encode %{
12217 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12218 %}
12219 ins_pipe(ialu_reg_reg);
12220 %}
12221
12222 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12223 %{
12224 predicate(VM_Version::supports_bmi2());
12225 match(Set dst (LShiftL (LoadL src) shift));
12226 ins_cost(175);
12227 format %{ "shlxq $dst, $src, $shift" %}
12228 ins_encode %{
12229 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12230 %}
12231 ins_pipe(ialu_reg_mem);
12232 %}
12233
12234 // Arithmetic Shift Right by 8-bit immediate
12235 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12236 %{
12237 predicate(!UseAPX);
12238 match(Set dst (RShiftL dst shift));
12239 effect(KILL cr);
12240
12241 format %{ "sarq $dst, $shift" %}
12242 ins_encode %{
12243 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12244 %}
12245 ins_pipe(ialu_mem_imm);
12246 %}
12247
12248 // Arithmetic Shift Right by 8-bit immediate
12249 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12250 %{
12251 predicate(UseAPX);
12252 match(Set dst (RShiftL src shift));
12253 effect(KILL cr);
12254
12255 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12256 ins_encode %{
12257 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12258 %}
12259 ins_pipe(ialu_mem_imm);
12260 %}
12261
12262 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12263 %{
12264 predicate(UseAPX);
12265 match(Set dst (RShiftL (LoadL src) shift));
12266 effect(KILL cr);
12267
12268 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12269 ins_encode %{
12270 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12271 %}
12272 ins_pipe(ialu_mem_imm);
12273 %}
12274
12275 // Arithmetic Shift Right by 8-bit immediate
12276 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12277 %{
12278 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12279 effect(KILL cr);
12280
12281 format %{ "sarq $dst, $shift" %}
12282 ins_encode %{
12283 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12284 %}
12285 ins_pipe(ialu_mem_imm);
12286 %}
12287
12288 // Arithmetic Shift Right by variable
12289 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12290 %{
12291 predicate(!VM_Version::supports_bmi2());
12292 match(Set dst (RShiftL dst shift));
12293 effect(KILL cr);
12294
12295 format %{ "sarq $dst, $shift" %}
12296 ins_encode %{
12297 __ sarq($dst$$Register);
12298 %}
12299 ins_pipe(ialu_reg_reg);
12300 %}
12301
12302 // Arithmetic Shift Right by variable
12303 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12304 %{
12305 predicate(!VM_Version::supports_bmi2());
12306 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12307 effect(KILL cr);
12308
12309 format %{ "sarq $dst, $shift" %}
12310 ins_encode %{
12311 __ sarq($dst$$Address);
12312 %}
12313 ins_pipe(ialu_mem_reg);
12314 %}
12315
12316 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12317 %{
12318 predicate(VM_Version::supports_bmi2());
12319 match(Set dst (RShiftL src shift));
12320
12321 format %{ "sarxq $dst, $src, $shift" %}
12322 ins_encode %{
12323 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12324 %}
12325 ins_pipe(ialu_reg_reg);
12326 %}
12327
12328 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12329 %{
12330 predicate(VM_Version::supports_bmi2());
12331 match(Set dst (RShiftL (LoadL src) shift));
12332 ins_cost(175);
12333 format %{ "sarxq $dst, $src, $shift" %}
12334 ins_encode %{
12335 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12336 %}
12337 ins_pipe(ialu_reg_mem);
12338 %}
12339
12340 // Logical Shift Right by 8-bit immediate
12341 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12342 %{
12343 predicate(!UseAPX);
12344 match(Set dst (URShiftL dst shift));
12345 effect(KILL cr);
12346
12347 format %{ "shrq $dst, $shift" %}
12348 ins_encode %{
12349 __ shrq($dst$$Register, $shift$$constant);
12350 %}
12351 ins_pipe(ialu_reg);
12352 %}
12353
12354 // Logical Shift Right by 8-bit immediate
12355 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12356 %{
12357 predicate(UseAPX);
12358 match(Set dst (URShiftL src shift));
12359 effect(KILL cr);
12360
12361 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12362 ins_encode %{
12363 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12364 %}
12365 ins_pipe(ialu_reg);
12366 %}
12367
12368 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12369 %{
12370 predicate(UseAPX);
12371 match(Set dst (URShiftL (LoadL src) shift));
12372 effect(KILL cr);
12373
12374 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12375 ins_encode %{
12376 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12377 %}
12378 ins_pipe(ialu_reg);
12379 %}
12380
12381 // Logical Shift Right by 8-bit immediate
12382 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12383 %{
12384 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12385 effect(KILL cr);
12386
12387 format %{ "shrq $dst, $shift" %}
12388 ins_encode %{
12389 __ shrq($dst$$Address, $shift$$constant);
12390 %}
12391 ins_pipe(ialu_mem_imm);
12392 %}
12393
12394 // Logical Shift Right by variable
12395 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12396 %{
12397 predicate(!VM_Version::supports_bmi2());
12398 match(Set dst (URShiftL dst shift));
12399 effect(KILL cr);
12400
12401 format %{ "shrq $dst, $shift" %}
12402 ins_encode %{
12403 __ shrq($dst$$Register);
12404 %}
12405 ins_pipe(ialu_reg_reg);
12406 %}
12407
12408 // Logical Shift Right by variable
12409 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12410 %{
12411 predicate(!VM_Version::supports_bmi2());
12412 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12413 effect(KILL cr);
12414
12415 format %{ "shrq $dst, $shift" %}
12416 ins_encode %{
12417 __ shrq($dst$$Address);
12418 %}
12419 ins_pipe(ialu_mem_reg);
12420 %}
12421
12422 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12423 %{
12424 predicate(VM_Version::supports_bmi2());
12425 match(Set dst (URShiftL src shift));
12426
12427 format %{ "shrxq $dst, $src, $shift" %}
12428 ins_encode %{
12429 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12430 %}
12431 ins_pipe(ialu_reg_reg);
12432 %}
12433
12434 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12435 %{
12436 predicate(VM_Version::supports_bmi2());
12437 match(Set dst (URShiftL (LoadL src) shift));
12438 ins_cost(175);
12439 format %{ "shrxq $dst, $src, $shift" %}
12440 ins_encode %{
12441 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12442 %}
12443 ins_pipe(ialu_reg_mem);
12444 %}
12445
12446 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12447 // This idiom is used by the compiler for the i2b bytecode.
12448 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12449 %{
12450 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12451
12452 format %{ "movsbl $dst, $src\t# i2b" %}
12453 ins_encode %{
12454 __ movsbl($dst$$Register, $src$$Register);
12455 %}
12456 ins_pipe(ialu_reg_reg);
12457 %}
12458
12459 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12460 // This idiom is used by the compiler the i2s bytecode.
12461 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12462 %{
12463 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12464
12465 format %{ "movswl $dst, $src\t# i2s" %}
12466 ins_encode %{
12467 __ movswl($dst$$Register, $src$$Register);
12468 %}
12469 ins_pipe(ialu_reg_reg);
12470 %}
12471
12472 // ROL/ROR instructions
12473
12474 // Rotate left by constant.
12475 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12476 %{
12477 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12478 match(Set dst (RotateLeft dst shift));
12479 effect(KILL cr);
12480 format %{ "roll $dst, $shift" %}
12481 ins_encode %{
12482 __ roll($dst$$Register, $shift$$constant);
12483 %}
12484 ins_pipe(ialu_reg);
12485 %}
12486
12487 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12488 %{
12489 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12490 match(Set dst (RotateLeft src shift));
12491 format %{ "rolxl $dst, $src, $shift" %}
12492 ins_encode %{
12493 int shift = 32 - ($shift$$constant & 31);
12494 __ rorxl($dst$$Register, $src$$Register, shift);
12495 %}
12496 ins_pipe(ialu_reg_reg);
12497 %}
12498
12499 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12500 %{
12501 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12502 match(Set dst (RotateLeft (LoadI src) shift));
12503 ins_cost(175);
12504 format %{ "rolxl $dst, $src, $shift" %}
12505 ins_encode %{
12506 int shift = 32 - ($shift$$constant & 31);
12507 __ rorxl($dst$$Register, $src$$Address, shift);
12508 %}
12509 ins_pipe(ialu_reg_mem);
12510 %}
12511
12512 // Rotate Left by variable
12513 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12514 %{
12515 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12516 match(Set dst (RotateLeft dst shift));
12517 effect(KILL cr);
12518 format %{ "roll $dst, $shift" %}
12519 ins_encode %{
12520 __ roll($dst$$Register);
12521 %}
12522 ins_pipe(ialu_reg_reg);
12523 %}
12524
12525 // Rotate Left by variable
12526 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12527 %{
12528 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12529 match(Set dst (RotateLeft src shift));
12530 effect(KILL cr);
12531
12532 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12533 ins_encode %{
12534 __ eroll($dst$$Register, $src$$Register, false);
12535 %}
12536 ins_pipe(ialu_reg_reg);
12537 %}
12538
12539 // Rotate Right by constant.
12540 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12541 %{
12542 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12543 match(Set dst (RotateRight dst shift));
12544 effect(KILL cr);
12545 format %{ "rorl $dst, $shift" %}
12546 ins_encode %{
12547 __ rorl($dst$$Register, $shift$$constant);
12548 %}
12549 ins_pipe(ialu_reg);
12550 %}
12551
12552 // Rotate Right by constant.
12553 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12554 %{
12555 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12556 match(Set dst (RotateRight src shift));
12557 format %{ "rorxl $dst, $src, $shift" %}
12558 ins_encode %{
12559 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12560 %}
12561 ins_pipe(ialu_reg_reg);
12562 %}
12563
12564 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12565 %{
12566 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12567 match(Set dst (RotateRight (LoadI src) shift));
12568 ins_cost(175);
12569 format %{ "rorxl $dst, $src, $shift" %}
12570 ins_encode %{
12571 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12572 %}
12573 ins_pipe(ialu_reg_mem);
12574 %}
12575
12576 // Rotate Right by variable
12577 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12578 %{
12579 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12580 match(Set dst (RotateRight dst shift));
12581 effect(KILL cr);
12582 format %{ "rorl $dst, $shift" %}
12583 ins_encode %{
12584 __ rorl($dst$$Register);
12585 %}
12586 ins_pipe(ialu_reg_reg);
12587 %}
12588
12589 // Rotate Right by variable
12590 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12591 %{
12592 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12593 match(Set dst (RotateRight src shift));
12594 effect(KILL cr);
12595
12596 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12597 ins_encode %{
12598 __ erorl($dst$$Register, $src$$Register, false);
12599 %}
12600 ins_pipe(ialu_reg_reg);
12601 %}
12602
12603 // Rotate Left by constant.
12604 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12605 %{
12606 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12607 match(Set dst (RotateLeft dst shift));
12608 effect(KILL cr);
12609 format %{ "rolq $dst, $shift" %}
12610 ins_encode %{
12611 __ rolq($dst$$Register, $shift$$constant);
12612 %}
12613 ins_pipe(ialu_reg);
12614 %}
12615
12616 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12617 %{
12618 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12619 match(Set dst (RotateLeft src shift));
12620 format %{ "rolxq $dst, $src, $shift" %}
12621 ins_encode %{
12622 int shift = 64 - ($shift$$constant & 63);
12623 __ rorxq($dst$$Register, $src$$Register, shift);
12624 %}
12625 ins_pipe(ialu_reg_reg);
12626 %}
12627
12628 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12629 %{
12630 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12631 match(Set dst (RotateLeft (LoadL src) shift));
12632 ins_cost(175);
12633 format %{ "rolxq $dst, $src, $shift" %}
12634 ins_encode %{
12635 int shift = 64 - ($shift$$constant & 63);
12636 __ rorxq($dst$$Register, $src$$Address, shift);
12637 %}
12638 ins_pipe(ialu_reg_mem);
12639 %}
12640
12641 // Rotate Left by variable
12642 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12643 %{
12644 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12645 match(Set dst (RotateLeft dst shift));
12646 effect(KILL cr);
12647 format %{ "rolq $dst, $shift" %}
12648 ins_encode %{
12649 __ rolq($dst$$Register);
12650 %}
12651 ins_pipe(ialu_reg_reg);
12652 %}
12653
12654 // Rotate Left by variable
12655 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12656 %{
12657 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12658 match(Set dst (RotateLeft src shift));
12659 effect(KILL cr);
12660
12661 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12662 ins_encode %{
12663 __ erolq($dst$$Register, $src$$Register, false);
12664 %}
12665 ins_pipe(ialu_reg_reg);
12666 %}
12667
12668 // Rotate Right by constant.
12669 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12670 %{
12671 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12672 match(Set dst (RotateRight dst shift));
12673 effect(KILL cr);
12674 format %{ "rorq $dst, $shift" %}
12675 ins_encode %{
12676 __ rorq($dst$$Register, $shift$$constant);
12677 %}
12678 ins_pipe(ialu_reg);
12679 %}
12680
12681 // Rotate Right by constant
12682 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12683 %{
12684 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12685 match(Set dst (RotateRight src shift));
12686 format %{ "rorxq $dst, $src, $shift" %}
12687 ins_encode %{
12688 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12689 %}
12690 ins_pipe(ialu_reg_reg);
12691 %}
12692
12693 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12694 %{
12695 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12696 match(Set dst (RotateRight (LoadL src) shift));
12697 ins_cost(175);
12698 format %{ "rorxq $dst, $src, $shift" %}
12699 ins_encode %{
12700 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12701 %}
12702 ins_pipe(ialu_reg_mem);
12703 %}
12704
12705 // Rotate Right by variable
12706 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12707 %{
12708 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12709 match(Set dst (RotateRight dst shift));
12710 effect(KILL cr);
12711 format %{ "rorq $dst, $shift" %}
12712 ins_encode %{
12713 __ rorq($dst$$Register);
12714 %}
12715 ins_pipe(ialu_reg_reg);
12716 %}
12717
12718 // Rotate Right by variable
12719 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12720 %{
12721 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12722 match(Set dst (RotateRight src shift));
12723 effect(KILL cr);
12724
12725 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12726 ins_encode %{
12727 __ erorq($dst$$Register, $src$$Register, false);
12728 %}
12729 ins_pipe(ialu_reg_reg);
12730 %}
12731
12732 //----------------------------- CompressBits/ExpandBits ------------------------
12733
12734 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12735 predicate(n->bottom_type()->isa_long());
12736 match(Set dst (CompressBits src mask));
12737 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12738 ins_encode %{
12739 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12740 %}
12741 ins_pipe( pipe_slow );
12742 %}
12743
12744 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12745 predicate(n->bottom_type()->isa_long());
12746 match(Set dst (ExpandBits src mask));
12747 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12748 ins_encode %{
12749 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12750 %}
12751 ins_pipe( pipe_slow );
12752 %}
12753
12754 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12755 predicate(n->bottom_type()->isa_long());
12756 match(Set dst (CompressBits src (LoadL mask)));
12757 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12758 ins_encode %{
12759 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12760 %}
12761 ins_pipe( pipe_slow );
12762 %}
12763
12764 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12765 predicate(n->bottom_type()->isa_long());
12766 match(Set dst (ExpandBits src (LoadL mask)));
12767 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12768 ins_encode %{
12769 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12770 %}
12771 ins_pipe( pipe_slow );
12772 %}
12773
12774
12775 // Logical Instructions
12776
12777 // Integer Logical Instructions
12778
12779 // And Instructions
12780 // And Register with Register
12781 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12782 %{
12783 predicate(!UseAPX);
12784 match(Set dst (AndI dst src));
12785 effect(KILL cr);
12786 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12787
12788 format %{ "andl $dst, $src\t# int" %}
12789 ins_encode %{
12790 __ andl($dst$$Register, $src$$Register);
12791 %}
12792 ins_pipe(ialu_reg_reg);
12793 %}
12794
12795 // And Register with Register using New Data Destination (NDD)
12796 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12797 %{
12798 predicate(UseAPX);
12799 match(Set dst (AndI src1 src2));
12800 effect(KILL cr);
12801 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12802
12803 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12804 ins_encode %{
12805 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12806
12807 %}
12808 ins_pipe(ialu_reg_reg);
12809 %}
12810
12811 // And Register with Immediate 255
12812 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12813 %{
12814 match(Set dst (AndI src mask));
12815
12816 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12817 ins_encode %{
12818 __ movzbl($dst$$Register, $src$$Register);
12819 %}
12820 ins_pipe(ialu_reg);
12821 %}
12822
12823 // And Register with Immediate 255 and promote to long
12824 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12825 %{
12826 match(Set dst (ConvI2L (AndI src mask)));
12827
12828 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12829 ins_encode %{
12830 __ movzbl($dst$$Register, $src$$Register);
12831 %}
12832 ins_pipe(ialu_reg);
12833 %}
12834
12835 // And Register with Immediate 65535
12836 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12837 %{
12838 match(Set dst (AndI src mask));
12839
12840 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12841 ins_encode %{
12842 __ movzwl($dst$$Register, $src$$Register);
12843 %}
12844 ins_pipe(ialu_reg);
12845 %}
12846
12847 // And Register with Immediate 65535 and promote to long
12848 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12849 %{
12850 match(Set dst (ConvI2L (AndI src mask)));
12851
12852 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12853 ins_encode %{
12854 __ movzwl($dst$$Register, $src$$Register);
12855 %}
12856 ins_pipe(ialu_reg);
12857 %}
12858
12859 // Can skip int2long conversions after AND with small bitmask
12860 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12861 %{
12862 predicate(VM_Version::supports_bmi2());
12863 ins_cost(125);
12864 effect(TEMP tmp, KILL cr);
12865 match(Set dst (ConvI2L (AndI src mask)));
12866 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12867 ins_encode %{
12868 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12869 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12870 %}
12871 ins_pipe(ialu_reg_reg);
12872 %}
12873
12874 // And Register with Immediate
12875 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12876 %{
12877 predicate(!UseAPX);
12878 match(Set dst (AndI dst src));
12879 effect(KILL cr);
12880 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12881
12882 format %{ "andl $dst, $src\t# int" %}
12883 ins_encode %{
12884 __ andl($dst$$Register, $src$$constant);
12885 %}
12886 ins_pipe(ialu_reg);
12887 %}
12888
12889 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12890 %{
12891 predicate(UseAPX);
12892 match(Set dst (AndI src1 src2));
12893 effect(KILL cr);
12894 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12895
12896 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12897 ins_encode %{
12898 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12899 %}
12900 ins_pipe(ialu_reg);
12901 %}
12902
12903 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12904 %{
12905 predicate(UseAPX);
12906 match(Set dst (AndI (LoadI src1) src2));
12907 effect(KILL cr);
12908 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12909
12910 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12911 ins_encode %{
12912 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12913 %}
12914 ins_pipe(ialu_reg);
12915 %}
12916
12917 // And Register with Memory
12918 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12919 %{
12920 predicate(!UseAPX);
12921 match(Set dst (AndI dst (LoadI src)));
12922 effect(KILL cr);
12923 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12924
12925 ins_cost(150);
12926 format %{ "andl $dst, $src\t# int" %}
12927 ins_encode %{
12928 __ andl($dst$$Register, $src$$Address);
12929 %}
12930 ins_pipe(ialu_reg_mem);
12931 %}
12932
12933 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12934 %{
12935 predicate(UseAPX);
12936 match(Set dst (AndI src1 (LoadI src2)));
12937 effect(KILL cr);
12938 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12939
12940 ins_cost(150);
12941 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12942 ins_encode %{
12943 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
12944 %}
12945 ins_pipe(ialu_reg_mem);
12946 %}
12947
12948 // And Memory with Register
12949 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12950 %{
12951 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12952 effect(KILL cr);
12953 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12954
12955 ins_cost(150);
12956 format %{ "andb $dst, $src\t# byte" %}
12957 ins_encode %{
12958 __ andb($dst$$Address, $src$$Register);
12959 %}
12960 ins_pipe(ialu_mem_reg);
12961 %}
12962
12963 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12964 %{
12965 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12966 effect(KILL cr);
12967 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12968
12969 ins_cost(150);
12970 format %{ "andl $dst, $src\t# int" %}
12971 ins_encode %{
12972 __ andl($dst$$Address, $src$$Register);
12973 %}
12974 ins_pipe(ialu_mem_reg);
12975 %}
12976
12977 // And Memory with Immediate
12978 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12979 %{
12980 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12981 effect(KILL cr);
12982 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12983
12984 ins_cost(125);
12985 format %{ "andl $dst, $src\t# int" %}
12986 ins_encode %{
12987 __ andl($dst$$Address, $src$$constant);
12988 %}
12989 ins_pipe(ialu_mem_imm);
12990 %}
12991
12992 // BMI1 instructions
12993 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12994 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12995 predicate(UseBMI1Instructions);
12996 effect(KILL cr);
12997 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12998
12999 ins_cost(125);
13000 format %{ "andnl $dst, $src1, $src2" %}
13001
13002 ins_encode %{
13003 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13004 %}
13005 ins_pipe(ialu_reg_mem);
13006 %}
13007
13008 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13009 match(Set dst (AndI (XorI src1 minus_1) src2));
13010 predicate(UseBMI1Instructions);
13011 effect(KILL cr);
13012 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13013
13014 format %{ "andnl $dst, $src1, $src2" %}
13015
13016 ins_encode %{
13017 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13018 %}
13019 ins_pipe(ialu_reg);
13020 %}
13021
13022 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13023 match(Set dst (AndI (SubI imm_zero src) src));
13024 predicate(UseBMI1Instructions);
13025 effect(KILL cr);
13026 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13027
13028 format %{ "blsil $dst, $src" %}
13029
13030 ins_encode %{
13031 __ blsil($dst$$Register, $src$$Register);
13032 %}
13033 ins_pipe(ialu_reg);
13034 %}
13035
13036 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13037 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13038 predicate(UseBMI1Instructions);
13039 effect(KILL cr);
13040 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13041
13042 ins_cost(125);
13043 format %{ "blsil $dst, $src" %}
13044
13045 ins_encode %{
13046 __ blsil($dst$$Register, $src$$Address);
13047 %}
13048 ins_pipe(ialu_reg_mem);
13049 %}
13050
13051 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13052 %{
13053 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13054 predicate(UseBMI1Instructions);
13055 effect(KILL cr);
13056 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13057
13058 ins_cost(125);
13059 format %{ "blsmskl $dst, $src" %}
13060
13061 ins_encode %{
13062 __ blsmskl($dst$$Register, $src$$Address);
13063 %}
13064 ins_pipe(ialu_reg_mem);
13065 %}
13066
13067 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13068 %{
13069 match(Set dst (XorI (AddI src minus_1) src));
13070 predicate(UseBMI1Instructions);
13071 effect(KILL cr);
13072 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13073
13074 format %{ "blsmskl $dst, $src" %}
13075
13076 ins_encode %{
13077 __ blsmskl($dst$$Register, $src$$Register);
13078 %}
13079
13080 ins_pipe(ialu_reg);
13081 %}
13082
13083 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13084 %{
13085 match(Set dst (AndI (AddI src minus_1) src) );
13086 predicate(UseBMI1Instructions);
13087 effect(KILL cr);
13088 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13089
13090 format %{ "blsrl $dst, $src" %}
13091
13092 ins_encode %{
13093 __ blsrl($dst$$Register, $src$$Register);
13094 %}
13095
13096 ins_pipe(ialu_reg_mem);
13097 %}
13098
13099 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13100 %{
13101 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13102 predicate(UseBMI1Instructions);
13103 effect(KILL cr);
13104 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13105
13106 ins_cost(125);
13107 format %{ "blsrl $dst, $src" %}
13108
13109 ins_encode %{
13110 __ blsrl($dst$$Register, $src$$Address);
13111 %}
13112
13113 ins_pipe(ialu_reg);
13114 %}
13115
13116 // Or Instructions
13117 // Or Register with Register
13118 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13119 %{
13120 predicate(!UseAPX);
13121 match(Set dst (OrI dst src));
13122 effect(KILL cr);
13123 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13124
13125 format %{ "orl $dst, $src\t# int" %}
13126 ins_encode %{
13127 __ orl($dst$$Register, $src$$Register);
13128 %}
13129 ins_pipe(ialu_reg_reg);
13130 %}
13131
13132 // Or Register with Register using New Data Destination (NDD)
13133 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13134 %{
13135 predicate(UseAPX);
13136 match(Set dst (OrI src1 src2));
13137 effect(KILL cr);
13138 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13139
13140 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13141 ins_encode %{
13142 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13143 %}
13144 ins_pipe(ialu_reg_reg);
13145 %}
13146
13147 // Or Register with Immediate
13148 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13149 %{
13150 predicate(!UseAPX);
13151 match(Set dst (OrI dst src));
13152 effect(KILL cr);
13153 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13154
13155 format %{ "orl $dst, $src\t# int" %}
13156 ins_encode %{
13157 __ orl($dst$$Register, $src$$constant);
13158 %}
13159 ins_pipe(ialu_reg);
13160 %}
13161
13162 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13163 %{
13164 predicate(UseAPX);
13165 match(Set dst (OrI src1 src2));
13166 effect(KILL cr);
13167 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13168
13169 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13170 ins_encode %{
13171 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13172 %}
13173 ins_pipe(ialu_reg);
13174 %}
13175
13176 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13177 %{
13178 predicate(UseAPX);
13179 match(Set dst (OrI src1 src2));
13180 effect(KILL cr);
13181 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13182
13183 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13184 ins_encode %{
13185 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13186 %}
13187 ins_pipe(ialu_reg);
13188 %}
13189
13190 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13191 %{
13192 predicate(UseAPX);
13193 match(Set dst (OrI (LoadI src1) src2));
13194 effect(KILL cr);
13195 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13196
13197 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13198 ins_encode %{
13199 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13200 %}
13201 ins_pipe(ialu_reg);
13202 %}
13203
13204 // Or Register with Memory
13205 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13206 %{
13207 predicate(!UseAPX);
13208 match(Set dst (OrI dst (LoadI src)));
13209 effect(KILL cr);
13210 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13211
13212 ins_cost(150);
13213 format %{ "orl $dst, $src\t# int" %}
13214 ins_encode %{
13215 __ orl($dst$$Register, $src$$Address);
13216 %}
13217 ins_pipe(ialu_reg_mem);
13218 %}
13219
13220 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13221 %{
13222 predicate(UseAPX);
13223 match(Set dst (OrI src1 (LoadI src2)));
13224 effect(KILL cr);
13225 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13226
13227 ins_cost(150);
13228 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13229 ins_encode %{
13230 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13231 %}
13232 ins_pipe(ialu_reg_mem);
13233 %}
13234
13235 // Or Memory with Register
13236 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13237 %{
13238 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13239 effect(KILL cr);
13240 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13241
13242 ins_cost(150);
13243 format %{ "orb $dst, $src\t# byte" %}
13244 ins_encode %{
13245 __ orb($dst$$Address, $src$$Register);
13246 %}
13247 ins_pipe(ialu_mem_reg);
13248 %}
13249
13250 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13251 %{
13252 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13253 effect(KILL cr);
13254 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13255
13256 ins_cost(150);
13257 format %{ "orl $dst, $src\t# int" %}
13258 ins_encode %{
13259 __ orl($dst$$Address, $src$$Register);
13260 %}
13261 ins_pipe(ialu_mem_reg);
13262 %}
13263
13264 // Or Memory with Immediate
13265 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13266 %{
13267 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13268 effect(KILL cr);
13269 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13270
13271 ins_cost(125);
13272 format %{ "orl $dst, $src\t# int" %}
13273 ins_encode %{
13274 __ orl($dst$$Address, $src$$constant);
13275 %}
13276 ins_pipe(ialu_mem_imm);
13277 %}
13278
13279 // Xor Instructions
13280 // Xor Register with Register
13281 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13282 %{
13283 predicate(!UseAPX);
13284 match(Set dst (XorI dst src));
13285 effect(KILL cr);
13286 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13287
13288 format %{ "xorl $dst, $src\t# int" %}
13289 ins_encode %{
13290 __ xorl($dst$$Register, $src$$Register);
13291 %}
13292 ins_pipe(ialu_reg_reg);
13293 %}
13294
13295 // Xor Register with Register using New Data Destination (NDD)
13296 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13297 %{
13298 predicate(UseAPX);
13299 match(Set dst (XorI src1 src2));
13300 effect(KILL cr);
13301 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13302
13303 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13304 ins_encode %{
13305 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13306 %}
13307 ins_pipe(ialu_reg_reg);
13308 %}
13309
13310 // Xor Register with Immediate -1
13311 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13312 %{
13313 predicate(!UseAPX);
13314 match(Set dst (XorI dst imm));
13315
13316 format %{ "notl $dst" %}
13317 ins_encode %{
13318 __ notl($dst$$Register);
13319 %}
13320 ins_pipe(ialu_reg);
13321 %}
13322
13323 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13324 %{
13325 match(Set dst (XorI src imm));
13326 predicate(UseAPX);
13327
13328 format %{ "enotl $dst, $src" %}
13329 ins_encode %{
13330 __ enotl($dst$$Register, $src$$Register);
13331 %}
13332 ins_pipe(ialu_reg);
13333 %}
13334
13335 // Xor Register with Immediate
13336 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13337 %{
13338 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13339 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13340 match(Set dst (XorI dst src));
13341 effect(KILL cr);
13342 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13343
13344 format %{ "xorl $dst, $src\t# int" %}
13345 ins_encode %{
13346 __ xorl($dst$$Register, $src$$constant);
13347 %}
13348 ins_pipe(ialu_reg);
13349 %}
13350
13351 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13352 %{
13353 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13354 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13355 match(Set dst (XorI src1 src2));
13356 effect(KILL cr);
13357 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13358
13359 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13360 ins_encode %{
13361 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13362 %}
13363 ins_pipe(ialu_reg);
13364 %}
13365
13366 // Xor Memory with Immediate
13367 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13368 %{
13369 predicate(UseAPX);
13370 match(Set dst (XorI (LoadI src1) src2));
13371 effect(KILL cr);
13372 ins_cost(150);
13373 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13374
13375 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13376 ins_encode %{
13377 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13378 %}
13379 ins_pipe(ialu_reg);
13380 %}
13381
13382 // Xor Register with Memory
13383 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13384 %{
13385 predicate(!UseAPX);
13386 match(Set dst (XorI dst (LoadI src)));
13387 effect(KILL cr);
13388 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13389
13390 ins_cost(150);
13391 format %{ "xorl $dst, $src\t# int" %}
13392 ins_encode %{
13393 __ xorl($dst$$Register, $src$$Address);
13394 %}
13395 ins_pipe(ialu_reg_mem);
13396 %}
13397
13398 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13399 %{
13400 predicate(UseAPX);
13401 match(Set dst (XorI src1 (LoadI src2)));
13402 effect(KILL cr);
13403 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13404
13405 ins_cost(150);
13406 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13407 ins_encode %{
13408 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13409 %}
13410 ins_pipe(ialu_reg_mem);
13411 %}
13412
13413 // Xor Memory with Register
13414 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13415 %{
13416 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13417 effect(KILL cr);
13418 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13419
13420 ins_cost(150);
13421 format %{ "xorb $dst, $src\t# byte" %}
13422 ins_encode %{
13423 __ xorb($dst$$Address, $src$$Register);
13424 %}
13425 ins_pipe(ialu_mem_reg);
13426 %}
13427
13428 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13429 %{
13430 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13431 effect(KILL cr);
13432 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13433
13434 ins_cost(150);
13435 format %{ "xorl $dst, $src\t# int" %}
13436 ins_encode %{
13437 __ xorl($dst$$Address, $src$$Register);
13438 %}
13439 ins_pipe(ialu_mem_reg);
13440 %}
13441
13442 // Xor Memory with Immediate
13443 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13444 %{
13445 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13446 effect(KILL cr);
13447 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13448
13449 ins_cost(125);
13450 format %{ "xorl $dst, $src\t# int" %}
13451 ins_encode %{
13452 __ xorl($dst$$Address, $src$$constant);
13453 %}
13454 ins_pipe(ialu_mem_imm);
13455 %}
13456
13457
13458 // Long Logical Instructions
13459
13460 // And Instructions
13461 // And Register with Register
13462 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13463 %{
13464 predicate(!UseAPX);
13465 match(Set dst (AndL dst src));
13466 effect(KILL cr);
13467 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13468
13469 format %{ "andq $dst, $src\t# long" %}
13470 ins_encode %{
13471 __ andq($dst$$Register, $src$$Register);
13472 %}
13473 ins_pipe(ialu_reg_reg);
13474 %}
13475
13476 // And Register with Register using New Data Destination (NDD)
13477 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13478 %{
13479 predicate(UseAPX);
13480 match(Set dst (AndL src1 src2));
13481 effect(KILL cr);
13482 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13483
13484 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13485 ins_encode %{
13486 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13487
13488 %}
13489 ins_pipe(ialu_reg_reg);
13490 %}
13491
13492 // And Register with Immediate 255
13493 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13494 %{
13495 match(Set dst (AndL src mask));
13496
13497 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13498 ins_encode %{
13499 // movzbl zeroes out the upper 32-bit and does not need REX.W
13500 __ movzbl($dst$$Register, $src$$Register);
13501 %}
13502 ins_pipe(ialu_reg);
13503 %}
13504
13505 // And Register with Immediate 65535
13506 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13507 %{
13508 match(Set dst (AndL src mask));
13509
13510 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13511 ins_encode %{
13512 // movzwl zeroes out the upper 32-bit and does not need REX.W
13513 __ movzwl($dst$$Register, $src$$Register);
13514 %}
13515 ins_pipe(ialu_reg);
13516 %}
13517
13518 // And Register with Immediate
13519 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13520 %{
13521 predicate(!UseAPX);
13522 match(Set dst (AndL dst src));
13523 effect(KILL cr);
13524 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13525
13526 format %{ "andq $dst, $src\t# long" %}
13527 ins_encode %{
13528 __ andq($dst$$Register, $src$$constant);
13529 %}
13530 ins_pipe(ialu_reg);
13531 %}
13532
13533 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13534 %{
13535 predicate(UseAPX);
13536 match(Set dst (AndL src1 src2));
13537 effect(KILL cr);
13538 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13539
13540 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13541 ins_encode %{
13542 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13543 %}
13544 ins_pipe(ialu_reg);
13545 %}
13546
13547 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13548 %{
13549 predicate(UseAPX);
13550 match(Set dst (AndL (LoadL src1) src2));
13551 effect(KILL cr);
13552 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13553
13554 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13555 ins_encode %{
13556 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13557 %}
13558 ins_pipe(ialu_reg);
13559 %}
13560
13561 // And Register with Memory
13562 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13563 %{
13564 predicate(!UseAPX);
13565 match(Set dst (AndL dst (LoadL src)));
13566 effect(KILL cr);
13567 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13568
13569 ins_cost(150);
13570 format %{ "andq $dst, $src\t# long" %}
13571 ins_encode %{
13572 __ andq($dst$$Register, $src$$Address);
13573 %}
13574 ins_pipe(ialu_reg_mem);
13575 %}
13576
13577 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13578 %{
13579 predicate(UseAPX);
13580 match(Set dst (AndL src1 (LoadL src2)));
13581 effect(KILL cr);
13582 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13583
13584 ins_cost(150);
13585 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13586 ins_encode %{
13587 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13588 %}
13589 ins_pipe(ialu_reg_mem);
13590 %}
13591
13592 // And Memory with Register
13593 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13594 %{
13595 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13596 effect(KILL cr);
13597 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13598
13599 ins_cost(150);
13600 format %{ "andq $dst, $src\t# long" %}
13601 ins_encode %{
13602 __ andq($dst$$Address, $src$$Register);
13603 %}
13604 ins_pipe(ialu_mem_reg);
13605 %}
13606
13607 // And Memory with Immediate
13608 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13609 %{
13610 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13611 effect(KILL cr);
13612 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13613
13614 ins_cost(125);
13615 format %{ "andq $dst, $src\t# long" %}
13616 ins_encode %{
13617 __ andq($dst$$Address, $src$$constant);
13618 %}
13619 ins_pipe(ialu_mem_imm);
13620 %}
13621
13622 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13623 %{
13624 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13625 // because AND/OR works well enough for 8/32-bit values.
13626 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13627
13628 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13629 effect(KILL cr);
13630
13631 ins_cost(125);
13632 format %{ "btrq $dst, log2(not($con))\t# long" %}
13633 ins_encode %{
13634 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13635 %}
13636 ins_pipe(ialu_mem_imm);
13637 %}
13638
13639 // BMI1 instructions
13640 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13641 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13642 predicate(UseBMI1Instructions);
13643 effect(KILL cr);
13644 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13645
13646 ins_cost(125);
13647 format %{ "andnq $dst, $src1, $src2" %}
13648
13649 ins_encode %{
13650 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13651 %}
13652 ins_pipe(ialu_reg_mem);
13653 %}
13654
13655 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13656 match(Set dst (AndL (XorL src1 minus_1) src2));
13657 predicate(UseBMI1Instructions);
13658 effect(KILL cr);
13659 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13660
13661 format %{ "andnq $dst, $src1, $src2" %}
13662
13663 ins_encode %{
13664 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13665 %}
13666 ins_pipe(ialu_reg_mem);
13667 %}
13668
13669 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13670 match(Set dst (AndL (SubL imm_zero src) src));
13671 predicate(UseBMI1Instructions);
13672 effect(KILL cr);
13673 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13674
13675 format %{ "blsiq $dst, $src" %}
13676
13677 ins_encode %{
13678 __ blsiq($dst$$Register, $src$$Register);
13679 %}
13680 ins_pipe(ialu_reg);
13681 %}
13682
13683 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13684 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13685 predicate(UseBMI1Instructions);
13686 effect(KILL cr);
13687 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13688
13689 ins_cost(125);
13690 format %{ "blsiq $dst, $src" %}
13691
13692 ins_encode %{
13693 __ blsiq($dst$$Register, $src$$Address);
13694 %}
13695 ins_pipe(ialu_reg_mem);
13696 %}
13697
13698 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13699 %{
13700 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13701 predicate(UseBMI1Instructions);
13702 effect(KILL cr);
13703 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13704
13705 ins_cost(125);
13706 format %{ "blsmskq $dst, $src" %}
13707
13708 ins_encode %{
13709 __ blsmskq($dst$$Register, $src$$Address);
13710 %}
13711 ins_pipe(ialu_reg_mem);
13712 %}
13713
13714 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13715 %{
13716 match(Set dst (XorL (AddL src minus_1) src));
13717 predicate(UseBMI1Instructions);
13718 effect(KILL cr);
13719 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13720
13721 format %{ "blsmskq $dst, $src" %}
13722
13723 ins_encode %{
13724 __ blsmskq($dst$$Register, $src$$Register);
13725 %}
13726
13727 ins_pipe(ialu_reg);
13728 %}
13729
13730 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13731 %{
13732 match(Set dst (AndL (AddL src minus_1) src) );
13733 predicate(UseBMI1Instructions);
13734 effect(KILL cr);
13735 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13736
13737 format %{ "blsrq $dst, $src" %}
13738
13739 ins_encode %{
13740 __ blsrq($dst$$Register, $src$$Register);
13741 %}
13742
13743 ins_pipe(ialu_reg);
13744 %}
13745
13746 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13747 %{
13748 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13749 predicate(UseBMI1Instructions);
13750 effect(KILL cr);
13751 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13752
13753 ins_cost(125);
13754 format %{ "blsrq $dst, $src" %}
13755
13756 ins_encode %{
13757 __ blsrq($dst$$Register, $src$$Address);
13758 %}
13759
13760 ins_pipe(ialu_reg);
13761 %}
13762
13763 // Or Instructions
13764 // Or Register with Register
13765 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13766 %{
13767 predicate(!UseAPX);
13768 match(Set dst (OrL dst src));
13769 effect(KILL cr);
13770 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13771
13772 format %{ "orq $dst, $src\t# long" %}
13773 ins_encode %{
13774 __ orq($dst$$Register, $src$$Register);
13775 %}
13776 ins_pipe(ialu_reg_reg);
13777 %}
13778
13779 // Or Register with Register using New Data Destination (NDD)
13780 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13781 %{
13782 predicate(UseAPX);
13783 match(Set dst (OrL src1 src2));
13784 effect(KILL cr);
13785 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786
13787 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13788 ins_encode %{
13789 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13790
13791 %}
13792 ins_pipe(ialu_reg_reg);
13793 %}
13794
13795 // Use any_RegP to match R15 (TLS register) without spilling.
13796 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13797 match(Set dst (OrL dst (CastP2X src)));
13798 effect(KILL cr);
13799 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13800
13801 format %{ "orq $dst, $src\t# long" %}
13802 ins_encode %{
13803 __ orq($dst$$Register, $src$$Register);
13804 %}
13805 ins_pipe(ialu_reg_reg);
13806 %}
13807
13808 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13809 match(Set dst (OrL src1 (CastP2X src2)));
13810 effect(KILL cr);
13811 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13812
13813 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13814 ins_encode %{
13815 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13816 %}
13817 ins_pipe(ialu_reg_reg);
13818 %}
13819
13820 // Or Register with Immediate
13821 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13822 %{
13823 predicate(!UseAPX);
13824 match(Set dst (OrL dst src));
13825 effect(KILL cr);
13826 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13827
13828 format %{ "orq $dst, $src\t# long" %}
13829 ins_encode %{
13830 __ orq($dst$$Register, $src$$constant);
13831 %}
13832 ins_pipe(ialu_reg);
13833 %}
13834
13835 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13836 %{
13837 predicate(UseAPX);
13838 match(Set dst (OrL src1 src2));
13839 effect(KILL cr);
13840 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13841
13842 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13843 ins_encode %{
13844 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13845 %}
13846 ins_pipe(ialu_reg);
13847 %}
13848
13849 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13850 %{
13851 predicate(UseAPX);
13852 match(Set dst (OrL src1 src2));
13853 effect(KILL cr);
13854 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13855
13856 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13857 ins_encode %{
13858 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13859 %}
13860 ins_pipe(ialu_reg);
13861 %}
13862
13863 // Or Memory with Immediate
13864 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13865 %{
13866 predicate(UseAPX);
13867 match(Set dst (OrL (LoadL src1) src2));
13868 effect(KILL cr);
13869 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13870
13871 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13872 ins_encode %{
13873 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13874 %}
13875 ins_pipe(ialu_reg);
13876 %}
13877
13878 // Or Register with Memory
13879 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13880 %{
13881 predicate(!UseAPX);
13882 match(Set dst (OrL dst (LoadL src)));
13883 effect(KILL cr);
13884 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13885
13886 ins_cost(150);
13887 format %{ "orq $dst, $src\t# long" %}
13888 ins_encode %{
13889 __ orq($dst$$Register, $src$$Address);
13890 %}
13891 ins_pipe(ialu_reg_mem);
13892 %}
13893
13894 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13895 %{
13896 predicate(UseAPX);
13897 match(Set dst (OrL src1 (LoadL src2)));
13898 effect(KILL cr);
13899 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13900
13901 ins_cost(150);
13902 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13903 ins_encode %{
13904 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13905 %}
13906 ins_pipe(ialu_reg_mem);
13907 %}
13908
13909 // Or Memory with Register
13910 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13911 %{
13912 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13913 effect(KILL cr);
13914 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13915
13916 ins_cost(150);
13917 format %{ "orq $dst, $src\t# long" %}
13918 ins_encode %{
13919 __ orq($dst$$Address, $src$$Register);
13920 %}
13921 ins_pipe(ialu_mem_reg);
13922 %}
13923
13924 // Or Memory with Immediate
13925 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13926 %{
13927 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13928 effect(KILL cr);
13929 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13930
13931 ins_cost(125);
13932 format %{ "orq $dst, $src\t# long" %}
13933 ins_encode %{
13934 __ orq($dst$$Address, $src$$constant);
13935 %}
13936 ins_pipe(ialu_mem_imm);
13937 %}
13938
13939 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13940 %{
13941 // con should be a pure 64-bit power of 2 immediate
13942 // because AND/OR works well enough for 8/32-bit values.
13943 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13944
13945 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13946 effect(KILL cr);
13947
13948 ins_cost(125);
13949 format %{ "btsq $dst, log2($con)\t# long" %}
13950 ins_encode %{
13951 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13952 %}
13953 ins_pipe(ialu_mem_imm);
13954 %}
13955
13956 // Xor Instructions
13957 // Xor Register with Register
13958 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13959 %{
13960 predicate(!UseAPX);
13961 match(Set dst (XorL dst src));
13962 effect(KILL cr);
13963 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13964
13965 format %{ "xorq $dst, $src\t# long" %}
13966 ins_encode %{
13967 __ xorq($dst$$Register, $src$$Register);
13968 %}
13969 ins_pipe(ialu_reg_reg);
13970 %}
13971
13972 // Xor Register with Register using New Data Destination (NDD)
13973 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13974 %{
13975 predicate(UseAPX);
13976 match(Set dst (XorL src1 src2));
13977 effect(KILL cr);
13978 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13979
13980 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13981 ins_encode %{
13982 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13983 %}
13984 ins_pipe(ialu_reg_reg);
13985 %}
13986
13987 // Xor Register with Immediate -1
13988 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13989 %{
13990 predicate(!UseAPX);
13991 match(Set dst (XorL dst imm));
13992
13993 format %{ "notq $dst" %}
13994 ins_encode %{
13995 __ notq($dst$$Register);
13996 %}
13997 ins_pipe(ialu_reg);
13998 %}
13999
14000 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14001 %{
14002 predicate(UseAPX);
14003 match(Set dst (XorL src imm));
14004
14005 format %{ "enotq $dst, $src" %}
14006 ins_encode %{
14007 __ enotq($dst$$Register, $src$$Register);
14008 %}
14009 ins_pipe(ialu_reg);
14010 %}
14011
14012 // Xor Register with Immediate
14013 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14014 %{
14015 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14016 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14017 match(Set dst (XorL dst src));
14018 effect(KILL cr);
14019 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14020
14021 format %{ "xorq $dst, $src\t# long" %}
14022 ins_encode %{
14023 __ xorq($dst$$Register, $src$$constant);
14024 %}
14025 ins_pipe(ialu_reg);
14026 %}
14027
14028 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14029 %{
14030 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14031 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14032 match(Set dst (XorL src1 src2));
14033 effect(KILL cr);
14034 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14035
14036 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14037 ins_encode %{
14038 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14039 %}
14040 ins_pipe(ialu_reg);
14041 %}
14042
14043 // Xor Memory with Immediate
14044 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14045 %{
14046 predicate(UseAPX);
14047 match(Set dst (XorL (LoadL src1) src2));
14048 effect(KILL cr);
14049 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14050 ins_cost(150);
14051
14052 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14053 ins_encode %{
14054 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14055 %}
14056 ins_pipe(ialu_reg);
14057 %}
14058
14059 // Xor Register with Memory
14060 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14061 %{
14062 predicate(!UseAPX);
14063 match(Set dst (XorL dst (LoadL src)));
14064 effect(KILL cr);
14065 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14066
14067 ins_cost(150);
14068 format %{ "xorq $dst, $src\t# long" %}
14069 ins_encode %{
14070 __ xorq($dst$$Register, $src$$Address);
14071 %}
14072 ins_pipe(ialu_reg_mem);
14073 %}
14074
14075 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14076 %{
14077 predicate(UseAPX);
14078 match(Set dst (XorL src1 (LoadL src2)));
14079 effect(KILL cr);
14080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081
14082 ins_cost(150);
14083 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14084 ins_encode %{
14085 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14086 %}
14087 ins_pipe(ialu_reg_mem);
14088 %}
14089
14090 // Xor Memory with Register
14091 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14092 %{
14093 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14094 effect(KILL cr);
14095 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14096
14097 ins_cost(150);
14098 format %{ "xorq $dst, $src\t# long" %}
14099 ins_encode %{
14100 __ xorq($dst$$Address, $src$$Register);
14101 %}
14102 ins_pipe(ialu_mem_reg);
14103 %}
14104
14105 // Xor Memory with Immediate
14106 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14107 %{
14108 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14109 effect(KILL cr);
14110 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14111
14112 ins_cost(125);
14113 format %{ "xorq $dst, $src\t# long" %}
14114 ins_encode %{
14115 __ xorq($dst$$Address, $src$$constant);
14116 %}
14117 ins_pipe(ialu_mem_imm);
14118 %}
14119
14120 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14121 %{
14122 match(Set dst (CmpLTMask p q));
14123 effect(KILL cr);
14124
14125 ins_cost(400);
14126 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14127 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14128 "negl $dst" %}
14129 ins_encode %{
14130 __ cmpl($p$$Register, $q$$Register);
14131 __ setcc(Assembler::less, $dst$$Register);
14132 __ negl($dst$$Register);
14133 %}
14134 ins_pipe(pipe_slow);
14135 %}
14136
14137 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14138 %{
14139 match(Set dst (CmpLTMask dst zero));
14140 effect(KILL cr);
14141
14142 ins_cost(100);
14143 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14144 ins_encode %{
14145 __ sarl($dst$$Register, 31);
14146 %}
14147 ins_pipe(ialu_reg);
14148 %}
14149
14150 /* Better to save a register than avoid a branch */
14151 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14152 %{
14153 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14154 effect(KILL cr);
14155 ins_cost(300);
14156 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14157 "jge done\n\t"
14158 "addl $p,$y\n"
14159 "done: " %}
14160 ins_encode %{
14161 Register Rp = $p$$Register;
14162 Register Rq = $q$$Register;
14163 Register Ry = $y$$Register;
14164 Label done;
14165 __ subl(Rp, Rq);
14166 __ jccb(Assembler::greaterEqual, done);
14167 __ addl(Rp, Ry);
14168 __ bind(done);
14169 %}
14170 ins_pipe(pipe_cmplt);
14171 %}
14172
14173 /* Better to save a register than avoid a branch */
14174 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14175 %{
14176 match(Set y (AndI (CmpLTMask p q) y));
14177 effect(KILL cr);
14178
14179 ins_cost(300);
14180
14181 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14182 "jlt done\n\t"
14183 "xorl $y, $y\n"
14184 "done: " %}
14185 ins_encode %{
14186 Register Rp = $p$$Register;
14187 Register Rq = $q$$Register;
14188 Register Ry = $y$$Register;
14189 Label done;
14190 __ cmpl(Rp, Rq);
14191 __ jccb(Assembler::less, done);
14192 __ xorl(Ry, Ry);
14193 __ bind(done);
14194 %}
14195 ins_pipe(pipe_cmplt);
14196 %}
14197
14198
14199 //---------- FP Instructions------------------------------------------------
14200
14201 // Really expensive, avoid
14202 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14203 %{
14204 match(Set cr (CmpF src1 src2));
14205
14206 ins_cost(500);
14207 format %{ "ucomiss $src1, $src2\n\t"
14208 "jnp,s exit\n\t"
14209 "pushfq\t# saw NaN, set CF\n\t"
14210 "andq [rsp], #0xffffff2b\n\t"
14211 "popfq\n"
14212 "exit:" %}
14213 ins_encode %{
14214 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14215 emit_cmpfp_fixup(masm);
14216 %}
14217 ins_pipe(pipe_slow);
14218 %}
14219
14220 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14221 match(Set cr (CmpF src1 src2));
14222
14223 ins_cost(100);
14224 format %{ "ucomiss $src1, $src2" %}
14225 ins_encode %{
14226 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14227 %}
14228 ins_pipe(pipe_slow);
14229 %}
14230
14231 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14232 match(Set cr (CmpF src1 (LoadF src2)));
14233
14234 ins_cost(100);
14235 format %{ "ucomiss $src1, $src2" %}
14236 ins_encode %{
14237 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14238 %}
14239 ins_pipe(pipe_slow);
14240 %}
14241
14242 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14243 match(Set cr (CmpF src con));
14244 ins_cost(100);
14245 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14246 ins_encode %{
14247 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14248 %}
14249 ins_pipe(pipe_slow);
14250 %}
14251
14252 // Really expensive, avoid
14253 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14254 %{
14255 match(Set cr (CmpD src1 src2));
14256
14257 ins_cost(500);
14258 format %{ "ucomisd $src1, $src2\n\t"
14259 "jnp,s exit\n\t"
14260 "pushfq\t# saw NaN, set CF\n\t"
14261 "andq [rsp], #0xffffff2b\n\t"
14262 "popfq\n"
14263 "exit:" %}
14264 ins_encode %{
14265 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14266 emit_cmpfp_fixup(masm);
14267 %}
14268 ins_pipe(pipe_slow);
14269 %}
14270
14271 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14272 match(Set cr (CmpD src1 src2));
14273
14274 ins_cost(100);
14275 format %{ "ucomisd $src1, $src2 test" %}
14276 ins_encode %{
14277 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14278 %}
14279 ins_pipe(pipe_slow);
14280 %}
14281
14282 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14283 match(Set cr (CmpD src1 (LoadD src2)));
14284
14285 ins_cost(100);
14286 format %{ "ucomisd $src1, $src2" %}
14287 ins_encode %{
14288 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14289 %}
14290 ins_pipe(pipe_slow);
14291 %}
14292
14293 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14294 match(Set cr (CmpD src con));
14295 ins_cost(100);
14296 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14297 ins_encode %{
14298 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14299 %}
14300 ins_pipe(pipe_slow);
14301 %}
14302
14303 // Compare into -1,0,1
14304 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14305 %{
14306 match(Set dst (CmpF3 src1 src2));
14307 effect(KILL cr);
14308
14309 ins_cost(275);
14310 format %{ "ucomiss $src1, $src2\n\t"
14311 "movl $dst, #-1\n\t"
14312 "jp,s done\n\t"
14313 "jb,s done\n\t"
14314 "setne $dst\n\t"
14315 "movzbl $dst, $dst\n"
14316 "done:" %}
14317 ins_encode %{
14318 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14319 emit_cmpfp3(masm, $dst$$Register);
14320 %}
14321 ins_pipe(pipe_slow);
14322 %}
14323
14324 // Compare into -1,0,1
14325 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14326 %{
14327 match(Set dst (CmpF3 src1 (LoadF src2)));
14328 effect(KILL cr);
14329
14330 ins_cost(275);
14331 format %{ "ucomiss $src1, $src2\n\t"
14332 "movl $dst, #-1\n\t"
14333 "jp,s done\n\t"
14334 "jb,s done\n\t"
14335 "setne $dst\n\t"
14336 "movzbl $dst, $dst\n"
14337 "done:" %}
14338 ins_encode %{
14339 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14340 emit_cmpfp3(masm, $dst$$Register);
14341 %}
14342 ins_pipe(pipe_slow);
14343 %}
14344
14345 // Compare into -1,0,1
14346 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14347 match(Set dst (CmpF3 src con));
14348 effect(KILL cr);
14349
14350 ins_cost(275);
14351 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14352 "movl $dst, #-1\n\t"
14353 "jp,s done\n\t"
14354 "jb,s done\n\t"
14355 "setne $dst\n\t"
14356 "movzbl $dst, $dst\n"
14357 "done:" %}
14358 ins_encode %{
14359 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14360 emit_cmpfp3(masm, $dst$$Register);
14361 %}
14362 ins_pipe(pipe_slow);
14363 %}
14364
14365 // Compare into -1,0,1
14366 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14367 %{
14368 match(Set dst (CmpD3 src1 src2));
14369 effect(KILL cr);
14370
14371 ins_cost(275);
14372 format %{ "ucomisd $src1, $src2\n\t"
14373 "movl $dst, #-1\n\t"
14374 "jp,s done\n\t"
14375 "jb,s done\n\t"
14376 "setne $dst\n\t"
14377 "movzbl $dst, $dst\n"
14378 "done:" %}
14379 ins_encode %{
14380 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14381 emit_cmpfp3(masm, $dst$$Register);
14382 %}
14383 ins_pipe(pipe_slow);
14384 %}
14385
14386 // Compare into -1,0,1
14387 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14388 %{
14389 match(Set dst (CmpD3 src1 (LoadD src2)));
14390 effect(KILL cr);
14391
14392 ins_cost(275);
14393 format %{ "ucomisd $src1, $src2\n\t"
14394 "movl $dst, #-1\n\t"
14395 "jp,s done\n\t"
14396 "jb,s done\n\t"
14397 "setne $dst\n\t"
14398 "movzbl $dst, $dst\n"
14399 "done:" %}
14400 ins_encode %{
14401 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14402 emit_cmpfp3(masm, $dst$$Register);
14403 %}
14404 ins_pipe(pipe_slow);
14405 %}
14406
14407 // Compare into -1,0,1
14408 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14409 match(Set dst (CmpD3 src con));
14410 effect(KILL cr);
14411
14412 ins_cost(275);
14413 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14414 "movl $dst, #-1\n\t"
14415 "jp,s done\n\t"
14416 "jb,s done\n\t"
14417 "setne $dst\n\t"
14418 "movzbl $dst, $dst\n"
14419 "done:" %}
14420 ins_encode %{
14421 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14422 emit_cmpfp3(masm, $dst$$Register);
14423 %}
14424 ins_pipe(pipe_slow);
14425 %}
14426
14427 //----------Arithmetic Conversion Instructions---------------------------------
14428
14429 instruct convF2D_reg_reg(regD dst, regF src)
14430 %{
14431 match(Set dst (ConvF2D src));
14432
14433 format %{ "cvtss2sd $dst, $src" %}
14434 ins_encode %{
14435 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14436 %}
14437 ins_pipe(pipe_slow); // XXX
14438 %}
14439
14440 instruct convF2D_reg_mem(regD dst, memory src)
14441 %{
14442 predicate(UseAVX == 0);
14443 match(Set dst (ConvF2D (LoadF src)));
14444
14445 format %{ "cvtss2sd $dst, $src" %}
14446 ins_encode %{
14447 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14448 %}
14449 ins_pipe(pipe_slow); // XXX
14450 %}
14451
14452 instruct convD2F_reg_reg(regF dst, regD src)
14453 %{
14454 match(Set dst (ConvD2F src));
14455
14456 format %{ "cvtsd2ss $dst, $src" %}
14457 ins_encode %{
14458 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14459 %}
14460 ins_pipe(pipe_slow); // XXX
14461 %}
14462
14463 instruct convD2F_reg_mem(regF dst, memory src)
14464 %{
14465 predicate(UseAVX == 0);
14466 match(Set dst (ConvD2F (LoadD src)));
14467
14468 format %{ "cvtsd2ss $dst, $src" %}
14469 ins_encode %{
14470 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14471 %}
14472 ins_pipe(pipe_slow); // XXX
14473 %}
14474
14475 // XXX do mem variants
14476 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14477 %{
14478 predicate(!VM_Version::supports_avx10_2());
14479 match(Set dst (ConvF2I src));
14480 effect(KILL cr);
14481 format %{ "convert_f2i $dst, $src" %}
14482 ins_encode %{
14483 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14484 %}
14485 ins_pipe(pipe_slow);
14486 %}
14487
14488 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14489 %{
14490 predicate(VM_Version::supports_avx10_2());
14491 match(Set dst (ConvF2I src));
14492 format %{ "evcvttss2sisl $dst, $src" %}
14493 ins_encode %{
14494 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14495 %}
14496 ins_pipe(pipe_slow);
14497 %}
14498
14499 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14500 %{
14501 predicate(VM_Version::supports_avx10_2());
14502 match(Set dst (ConvF2I (LoadF src)));
14503 format %{ "evcvttss2sisl $dst, $src" %}
14504 ins_encode %{
14505 __ evcvttss2sisl($dst$$Register, $src$$Address);
14506 %}
14507 ins_pipe(pipe_slow);
14508 %}
14509
14510 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14511 %{
14512 predicate(!VM_Version::supports_avx10_2());
14513 match(Set dst (ConvF2L src));
14514 effect(KILL cr);
14515 format %{ "convert_f2l $dst, $src"%}
14516 ins_encode %{
14517 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14518 %}
14519 ins_pipe(pipe_slow);
14520 %}
14521
14522 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14523 %{
14524 predicate(VM_Version::supports_avx10_2());
14525 match(Set dst (ConvF2L src));
14526 format %{ "evcvttss2sisq $dst, $src" %}
14527 ins_encode %{
14528 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14529 %}
14530 ins_pipe(pipe_slow);
14531 %}
14532
14533 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14534 %{
14535 predicate(VM_Version::supports_avx10_2());
14536 match(Set dst (ConvF2L (LoadF src)));
14537 format %{ "evcvttss2sisq $dst, $src" %}
14538 ins_encode %{
14539 __ evcvttss2sisq($dst$$Register, $src$$Address);
14540 %}
14541 ins_pipe(pipe_slow);
14542 %}
14543
14544 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14545 %{
14546 predicate(!VM_Version::supports_avx10_2());
14547 match(Set dst (ConvD2I src));
14548 effect(KILL cr);
14549 format %{ "convert_d2i $dst, $src"%}
14550 ins_encode %{
14551 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14552 %}
14553 ins_pipe(pipe_slow);
14554 %}
14555
14556 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14557 %{
14558 predicate(VM_Version::supports_avx10_2());
14559 match(Set dst (ConvD2I src));
14560 format %{ "evcvttsd2sisl $dst, $src" %}
14561 ins_encode %{
14562 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14563 %}
14564 ins_pipe(pipe_slow);
14565 %}
14566
14567 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14568 %{
14569 predicate(VM_Version::supports_avx10_2());
14570 match(Set dst (ConvD2I (LoadD src)));
14571 format %{ "evcvttsd2sisl $dst, $src" %}
14572 ins_encode %{
14573 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14574 %}
14575 ins_pipe(pipe_slow);
14576 %}
14577
14578 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14579 %{
14580 predicate(!VM_Version::supports_avx10_2());
14581 match(Set dst (ConvD2L src));
14582 effect(KILL cr);
14583 format %{ "convert_d2l $dst, $src"%}
14584 ins_encode %{
14585 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14586 %}
14587 ins_pipe(pipe_slow);
14588 %}
14589
14590 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14591 %{
14592 predicate(VM_Version::supports_avx10_2());
14593 match(Set dst (ConvD2L src));
14594 format %{ "evcvttsd2sisq $dst, $src" %}
14595 ins_encode %{
14596 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14597 %}
14598 ins_pipe(pipe_slow);
14599 %}
14600
14601 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14602 %{
14603 predicate(VM_Version::supports_avx10_2());
14604 match(Set dst (ConvD2L (LoadD src)));
14605 format %{ "evcvttsd2sisq $dst, $src" %}
14606 ins_encode %{
14607 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14608 %}
14609 ins_pipe(pipe_slow);
14610 %}
14611
14612 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14613 %{
14614 match(Set dst (RoundD src));
14615 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14616 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14617 ins_encode %{
14618 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14619 %}
14620 ins_pipe(pipe_slow);
14621 %}
14622
14623 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14624 %{
14625 match(Set dst (RoundF src));
14626 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14627 format %{ "round_float $dst,$src" %}
14628 ins_encode %{
14629 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14630 %}
14631 ins_pipe(pipe_slow);
14632 %}
14633
14634 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14635 %{
14636 predicate(!UseXmmI2F);
14637 match(Set dst (ConvI2F src));
14638
14639 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14640 ins_encode %{
14641 if (UseAVX > 0) {
14642 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14643 }
14644 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14645 %}
14646 ins_pipe(pipe_slow); // XXX
14647 %}
14648
14649 instruct convI2F_reg_mem(regF dst, memory src)
14650 %{
14651 predicate(UseAVX == 0);
14652 match(Set dst (ConvI2F (LoadI src)));
14653
14654 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14655 ins_encode %{
14656 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14657 %}
14658 ins_pipe(pipe_slow); // XXX
14659 %}
14660
14661 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14662 %{
14663 predicate(!UseXmmI2D);
14664 match(Set dst (ConvI2D src));
14665
14666 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14667 ins_encode %{
14668 if (UseAVX > 0) {
14669 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14670 }
14671 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14672 %}
14673 ins_pipe(pipe_slow); // XXX
14674 %}
14675
14676 instruct convI2D_reg_mem(regD dst, memory src)
14677 %{
14678 predicate(UseAVX == 0);
14679 match(Set dst (ConvI2D (LoadI src)));
14680
14681 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14682 ins_encode %{
14683 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14684 %}
14685 ins_pipe(pipe_slow); // XXX
14686 %}
14687
14688 instruct convXI2F_reg(regF dst, rRegI src)
14689 %{
14690 predicate(UseXmmI2F);
14691 match(Set dst (ConvI2F src));
14692
14693 format %{ "movdl $dst, $src\n\t"
14694 "cvtdq2psl $dst, $dst\t# i2f" %}
14695 ins_encode %{
14696 __ movdl($dst$$XMMRegister, $src$$Register);
14697 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14698 %}
14699 ins_pipe(pipe_slow); // XXX
14700 %}
14701
14702 instruct convXI2D_reg(regD dst, rRegI src)
14703 %{
14704 predicate(UseXmmI2D);
14705 match(Set dst (ConvI2D src));
14706
14707 format %{ "movdl $dst, $src\n\t"
14708 "cvtdq2pdl $dst, $dst\t# i2d" %}
14709 ins_encode %{
14710 __ movdl($dst$$XMMRegister, $src$$Register);
14711 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14712 %}
14713 ins_pipe(pipe_slow); // XXX
14714 %}
14715
14716 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14717 %{
14718 match(Set dst (ConvL2F src));
14719
14720 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14721 ins_encode %{
14722 if (UseAVX > 0) {
14723 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14724 }
14725 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14726 %}
14727 ins_pipe(pipe_slow); // XXX
14728 %}
14729
14730 instruct convL2F_reg_mem(regF dst, memory src)
14731 %{
14732 predicate(UseAVX == 0);
14733 match(Set dst (ConvL2F (LoadL src)));
14734
14735 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14736 ins_encode %{
14737 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14738 %}
14739 ins_pipe(pipe_slow); // XXX
14740 %}
14741
14742 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14743 %{
14744 match(Set dst (ConvL2D src));
14745
14746 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14747 ins_encode %{
14748 if (UseAVX > 0) {
14749 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14750 }
14751 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14752 %}
14753 ins_pipe(pipe_slow); // XXX
14754 %}
14755
14756 instruct convL2D_reg_mem(regD dst, memory src)
14757 %{
14758 predicate(UseAVX == 0);
14759 match(Set dst (ConvL2D (LoadL src)));
14760
14761 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14762 ins_encode %{
14763 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14764 %}
14765 ins_pipe(pipe_slow); // XXX
14766 %}
14767
14768 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14769 %{
14770 match(Set dst (ConvI2L src));
14771
14772 ins_cost(125);
14773 format %{ "movslq $dst, $src\t# i2l" %}
14774 ins_encode %{
14775 __ movslq($dst$$Register, $src$$Register);
14776 %}
14777 ins_pipe(ialu_reg_reg);
14778 %}
14779
14780 // Zero-extend convert int to long
14781 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14782 %{
14783 match(Set dst (AndL (ConvI2L src) mask));
14784
14785 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14786 ins_encode %{
14787 if ($dst$$reg != $src$$reg) {
14788 __ movl($dst$$Register, $src$$Register);
14789 }
14790 %}
14791 ins_pipe(ialu_reg_reg);
14792 %}
14793
14794 // Zero-extend convert int to long
14795 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14796 %{
14797 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14798
14799 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14800 ins_encode %{
14801 __ movl($dst$$Register, $src$$Address);
14802 %}
14803 ins_pipe(ialu_reg_mem);
14804 %}
14805
14806 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14807 %{
14808 match(Set dst (AndL src mask));
14809
14810 format %{ "movl $dst, $src\t# zero-extend long" %}
14811 ins_encode %{
14812 __ movl($dst$$Register, $src$$Register);
14813 %}
14814 ins_pipe(ialu_reg_reg);
14815 %}
14816
14817 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14818 %{
14819 match(Set dst (ConvL2I src));
14820
14821 format %{ "movl $dst, $src\t# l2i" %}
14822 ins_encode %{
14823 __ movl($dst$$Register, $src$$Register);
14824 %}
14825 ins_pipe(ialu_reg_reg);
14826 %}
14827
14828
14829 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14830 match(Set dst (MoveF2I src));
14831 effect(DEF dst, USE src);
14832
14833 ins_cost(125);
14834 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14835 ins_encode %{
14836 __ movl($dst$$Register, Address(rsp, $src$$disp));
14837 %}
14838 ins_pipe(ialu_reg_mem);
14839 %}
14840
14841 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14842 match(Set dst (MoveI2F src));
14843 effect(DEF dst, USE src);
14844
14845 ins_cost(125);
14846 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14847 ins_encode %{
14848 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14849 %}
14850 ins_pipe(pipe_slow);
14851 %}
14852
14853 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14854 match(Set dst (MoveD2L src));
14855 effect(DEF dst, USE src);
14856
14857 ins_cost(125);
14858 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14859 ins_encode %{
14860 __ movq($dst$$Register, Address(rsp, $src$$disp));
14861 %}
14862 ins_pipe(ialu_reg_mem);
14863 %}
14864
14865 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14866 predicate(!UseXmmLoadAndClearUpper);
14867 match(Set dst (MoveL2D src));
14868 effect(DEF dst, USE src);
14869
14870 ins_cost(125);
14871 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14872 ins_encode %{
14873 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14874 %}
14875 ins_pipe(pipe_slow);
14876 %}
14877
14878 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14879 predicate(UseXmmLoadAndClearUpper);
14880 match(Set dst (MoveL2D src));
14881 effect(DEF dst, USE src);
14882
14883 ins_cost(125);
14884 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14885 ins_encode %{
14886 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14887 %}
14888 ins_pipe(pipe_slow);
14889 %}
14890
14891
14892 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14893 match(Set dst (MoveF2I src));
14894 effect(DEF dst, USE src);
14895
14896 ins_cost(95); // XXX
14897 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14898 ins_encode %{
14899 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14900 %}
14901 ins_pipe(pipe_slow);
14902 %}
14903
14904 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14905 match(Set dst (MoveI2F src));
14906 effect(DEF dst, USE src);
14907
14908 ins_cost(100);
14909 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14910 ins_encode %{
14911 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14912 %}
14913 ins_pipe( ialu_mem_reg );
14914 %}
14915
14916 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14917 match(Set dst (MoveD2L src));
14918 effect(DEF dst, USE src);
14919
14920 ins_cost(95); // XXX
14921 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14922 ins_encode %{
14923 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14924 %}
14925 ins_pipe(pipe_slow);
14926 %}
14927
14928 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14929 match(Set dst (MoveL2D src));
14930 effect(DEF dst, USE src);
14931
14932 ins_cost(100);
14933 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14934 ins_encode %{
14935 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14936 %}
14937 ins_pipe(ialu_mem_reg);
14938 %}
14939
14940 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14941 match(Set dst (MoveF2I src));
14942 effect(DEF dst, USE src);
14943 ins_cost(85);
14944 format %{ "movd $dst,$src\t# MoveF2I" %}
14945 ins_encode %{
14946 __ movdl($dst$$Register, $src$$XMMRegister);
14947 %}
14948 ins_pipe( pipe_slow );
14949 %}
14950
14951 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14952 match(Set dst (MoveD2L src));
14953 effect(DEF dst, USE src);
14954 ins_cost(85);
14955 format %{ "movd $dst,$src\t# MoveD2L" %}
14956 ins_encode %{
14957 __ movdq($dst$$Register, $src$$XMMRegister);
14958 %}
14959 ins_pipe( pipe_slow );
14960 %}
14961
14962 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14963 match(Set dst (MoveI2F src));
14964 effect(DEF dst, USE src);
14965 ins_cost(100);
14966 format %{ "movd $dst,$src\t# MoveI2F" %}
14967 ins_encode %{
14968 __ movdl($dst$$XMMRegister, $src$$Register);
14969 %}
14970 ins_pipe( pipe_slow );
14971 %}
14972
14973 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14974 match(Set dst (MoveL2D src));
14975 effect(DEF dst, USE src);
14976 ins_cost(100);
14977 format %{ "movd $dst,$src\t# MoveL2D" %}
14978 ins_encode %{
14979 __ movdq($dst$$XMMRegister, $src$$Register);
14980 %}
14981 ins_pipe( pipe_slow );
14982 %}
14983
14984 // Fast clearing of an array
14985 // Small non-constant lenght ClearArray for non-AVX512 targets.
14986 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14987 Universe dummy, rFlagsReg cr)
14988 %{
14989 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14990 match(Set dummy (ClearArray cnt base));
14991 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14992
14993 format %{ $$template
14994 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
14995 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
14996 $$emit$$"jg LARGE\n\t"
14997 $$emit$$"dec rcx\n\t"
14998 $$emit$$"js DONE\t# Zero length\n\t"
14999 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15000 $$emit$$"dec rcx\n\t"
15001 $$emit$$"jge LOOP\n\t"
15002 $$emit$$"jmp DONE\n\t"
15003 $$emit$$"# LARGE:\n\t"
15004 if (UseFastStosb) {
15005 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15006 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15007 } else if (UseXMMForObjInit) {
15008 $$emit$$"mov rdi,rax\n\t"
15009 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15010 $$emit$$"jmpq L_zero_64_bytes\n\t"
15011 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15012 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15013 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15014 $$emit$$"add 0x40,rax\n\t"
15015 $$emit$$"# L_zero_64_bytes:\n\t"
15016 $$emit$$"sub 0x8,rcx\n\t"
15017 $$emit$$"jge L_loop\n\t"
15018 $$emit$$"add 0x4,rcx\n\t"
15019 $$emit$$"jl L_tail\n\t"
15020 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15021 $$emit$$"add 0x20,rax\n\t"
15022 $$emit$$"sub 0x4,rcx\n\t"
15023 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15024 $$emit$$"add 0x4,rcx\n\t"
15025 $$emit$$"jle L_end\n\t"
15026 $$emit$$"dec rcx\n\t"
15027 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15028 $$emit$$"vmovq xmm0,(rax)\n\t"
15029 $$emit$$"add 0x8,rax\n\t"
15030 $$emit$$"dec rcx\n\t"
15031 $$emit$$"jge L_sloop\n\t"
15032 $$emit$$"# L_end:\n\t"
15033 } else {
15034 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15035 }
15036 $$emit$$"# DONE"
15037 %}
15038 ins_encode %{
15039 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15040 $tmp$$XMMRegister, false, knoreg);
15041 %}
15042 ins_pipe(pipe_slow);
15043 %}
15044
15045 // Small non-constant length ClearArray for AVX512 targets.
15046 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15047 Universe dummy, rFlagsReg cr)
15048 %{
15049 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15050 match(Set dummy (ClearArray cnt base));
15051 ins_cost(125);
15052 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15053
15054 format %{ $$template
15055 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15056 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15057 $$emit$$"jg LARGE\n\t"
15058 $$emit$$"dec rcx\n\t"
15059 $$emit$$"js DONE\t# Zero length\n\t"
15060 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15061 $$emit$$"dec rcx\n\t"
15062 $$emit$$"jge LOOP\n\t"
15063 $$emit$$"jmp DONE\n\t"
15064 $$emit$$"# LARGE:\n\t"
15065 if (UseFastStosb) {
15066 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15067 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15068 } else if (UseXMMForObjInit) {
15069 $$emit$$"mov rdi,rax\n\t"
15070 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15071 $$emit$$"jmpq L_zero_64_bytes\n\t"
15072 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15073 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15074 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15075 $$emit$$"add 0x40,rax\n\t"
15076 $$emit$$"# L_zero_64_bytes:\n\t"
15077 $$emit$$"sub 0x8,rcx\n\t"
15078 $$emit$$"jge L_loop\n\t"
15079 $$emit$$"add 0x4,rcx\n\t"
15080 $$emit$$"jl L_tail\n\t"
15081 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15082 $$emit$$"add 0x20,rax\n\t"
15083 $$emit$$"sub 0x4,rcx\n\t"
15084 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15085 $$emit$$"add 0x4,rcx\n\t"
15086 $$emit$$"jle L_end\n\t"
15087 $$emit$$"dec rcx\n\t"
15088 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15089 $$emit$$"vmovq xmm0,(rax)\n\t"
15090 $$emit$$"add 0x8,rax\n\t"
15091 $$emit$$"dec rcx\n\t"
15092 $$emit$$"jge L_sloop\n\t"
15093 $$emit$$"# L_end:\n\t"
15094 } else {
15095 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15096 }
15097 $$emit$$"# DONE"
15098 %}
15099 ins_encode %{
15100 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15101 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15102 %}
15103 ins_pipe(pipe_slow);
15104 %}
15105
15106 // Large non-constant length ClearArray for non-AVX512 targets.
15107 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15108 Universe dummy, rFlagsReg cr)
15109 %{
15110 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15111 match(Set dummy (ClearArray cnt base));
15112 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15113
15114 format %{ $$template
15115 if (UseFastStosb) {
15116 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15117 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15118 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15119 } else if (UseXMMForObjInit) {
15120 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15121 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15122 $$emit$$"jmpq L_zero_64_bytes\n\t"
15123 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15124 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15125 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15126 $$emit$$"add 0x40,rax\n\t"
15127 $$emit$$"# L_zero_64_bytes:\n\t"
15128 $$emit$$"sub 0x8,rcx\n\t"
15129 $$emit$$"jge L_loop\n\t"
15130 $$emit$$"add 0x4,rcx\n\t"
15131 $$emit$$"jl L_tail\n\t"
15132 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15133 $$emit$$"add 0x20,rax\n\t"
15134 $$emit$$"sub 0x4,rcx\n\t"
15135 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15136 $$emit$$"add 0x4,rcx\n\t"
15137 $$emit$$"jle L_end\n\t"
15138 $$emit$$"dec rcx\n\t"
15139 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15140 $$emit$$"vmovq xmm0,(rax)\n\t"
15141 $$emit$$"add 0x8,rax\n\t"
15142 $$emit$$"dec rcx\n\t"
15143 $$emit$$"jge L_sloop\n\t"
15144 $$emit$$"# L_end:\n\t"
15145 } else {
15146 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15147 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15148 }
15149 %}
15150 ins_encode %{
15151 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15152 $tmp$$XMMRegister, true, knoreg);
15153 %}
15154 ins_pipe(pipe_slow);
15155 %}
15156
15157 // Large non-constant length ClearArray for AVX512 targets.
15158 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15159 Universe dummy, rFlagsReg cr)
15160 %{
15161 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15162 match(Set dummy (ClearArray cnt base));
15163 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15164
15165 format %{ $$template
15166 if (UseFastStosb) {
15167 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15168 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15169 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15170 } else if (UseXMMForObjInit) {
15171 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15172 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15173 $$emit$$"jmpq L_zero_64_bytes\n\t"
15174 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15175 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15176 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15177 $$emit$$"add 0x40,rax\n\t"
15178 $$emit$$"# L_zero_64_bytes:\n\t"
15179 $$emit$$"sub 0x8,rcx\n\t"
15180 $$emit$$"jge L_loop\n\t"
15181 $$emit$$"add 0x4,rcx\n\t"
15182 $$emit$$"jl L_tail\n\t"
15183 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184 $$emit$$"add 0x20,rax\n\t"
15185 $$emit$$"sub 0x4,rcx\n\t"
15186 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187 $$emit$$"add 0x4,rcx\n\t"
15188 $$emit$$"jle L_end\n\t"
15189 $$emit$$"dec rcx\n\t"
15190 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191 $$emit$$"vmovq xmm0,(rax)\n\t"
15192 $$emit$$"add 0x8,rax\n\t"
15193 $$emit$$"dec rcx\n\t"
15194 $$emit$$"jge L_sloop\n\t"
15195 $$emit$$"# L_end:\n\t"
15196 } else {
15197 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15198 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15199 }
15200 %}
15201 ins_encode %{
15202 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15204 %}
15205 ins_pipe(pipe_slow);
15206 %}
15207
15208 // Small constant length ClearArray for AVX512 targets.
15209 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15210 %{
15211 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15212 match(Set dummy (ClearArray cnt base));
15213 ins_cost(100);
15214 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15215 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15216 ins_encode %{
15217 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15218 %}
15219 ins_pipe(pipe_slow);
15220 %}
15221
15222 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15223 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15224 %{
15225 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15226 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15227 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15228
15229 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15230 ins_encode %{
15231 __ string_compare($str1$$Register, $str2$$Register,
15232 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15233 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15234 %}
15235 ins_pipe( pipe_slow );
15236 %}
15237
15238 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15239 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15240 %{
15241 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15242 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15243 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15244
15245 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15246 ins_encode %{
15247 __ string_compare($str1$$Register, $str2$$Register,
15248 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15249 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15250 %}
15251 ins_pipe( pipe_slow );
15252 %}
15253
15254 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15255 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15256 %{
15257 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15258 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15259 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15260
15261 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15262 ins_encode %{
15263 __ string_compare($str1$$Register, $str2$$Register,
15264 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15265 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15266 %}
15267 ins_pipe( pipe_slow );
15268 %}
15269
15270 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15271 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15272 %{
15273 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15274 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15275 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15276
15277 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15278 ins_encode %{
15279 __ string_compare($str1$$Register, $str2$$Register,
15280 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15281 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15282 %}
15283 ins_pipe( pipe_slow );
15284 %}
15285
15286 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15287 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15288 %{
15289 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15290 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15291 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15292
15293 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15294 ins_encode %{
15295 __ string_compare($str1$$Register, $str2$$Register,
15296 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15297 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15298 %}
15299 ins_pipe( pipe_slow );
15300 %}
15301
15302 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15303 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15304 %{
15305 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15306 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15307 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15308
15309 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15310 ins_encode %{
15311 __ string_compare($str1$$Register, $str2$$Register,
15312 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15313 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15314 %}
15315 ins_pipe( pipe_slow );
15316 %}
15317
15318 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15319 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15320 %{
15321 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15322 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15323 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15324
15325 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15326 ins_encode %{
15327 __ string_compare($str2$$Register, $str1$$Register,
15328 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15329 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15330 %}
15331 ins_pipe( pipe_slow );
15332 %}
15333
15334 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15335 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15336 %{
15337 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15338 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15339 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15340
15341 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15342 ins_encode %{
15343 __ string_compare($str2$$Register, $str1$$Register,
15344 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15345 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15346 %}
15347 ins_pipe( pipe_slow );
15348 %}
15349
15350 // fast search of substring with known size.
15351 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15352 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15353 %{
15354 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15355 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15356 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15357
15358 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15359 ins_encode %{
15360 int icnt2 = (int)$int_cnt2$$constant;
15361 if (icnt2 >= 16) {
15362 // IndexOf for constant substrings with size >= 16 elements
15363 // which don't need to be loaded through stack.
15364 __ string_indexofC8($str1$$Register, $str2$$Register,
15365 $cnt1$$Register, $cnt2$$Register,
15366 icnt2, $result$$Register,
15367 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15368 } else {
15369 // Small strings are loaded through stack if they cross page boundary.
15370 __ string_indexof($str1$$Register, $str2$$Register,
15371 $cnt1$$Register, $cnt2$$Register,
15372 icnt2, $result$$Register,
15373 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15374 }
15375 %}
15376 ins_pipe( pipe_slow );
15377 %}
15378
15379 // fast search of substring with known size.
15380 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15381 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15382 %{
15383 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15384 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15385 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15386
15387 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15388 ins_encode %{
15389 int icnt2 = (int)$int_cnt2$$constant;
15390 if (icnt2 >= 8) {
15391 // IndexOf for constant substrings with size >= 8 elements
15392 // which don't need to be loaded through stack.
15393 __ string_indexofC8($str1$$Register, $str2$$Register,
15394 $cnt1$$Register, $cnt2$$Register,
15395 icnt2, $result$$Register,
15396 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15397 } else {
15398 // Small strings are loaded through stack if they cross page boundary.
15399 __ string_indexof($str1$$Register, $str2$$Register,
15400 $cnt1$$Register, $cnt2$$Register,
15401 icnt2, $result$$Register,
15402 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15403 }
15404 %}
15405 ins_pipe( pipe_slow );
15406 %}
15407
15408 // fast search of substring with known size.
15409 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15410 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15411 %{
15412 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15413 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15414 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15415
15416 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15417 ins_encode %{
15418 int icnt2 = (int)$int_cnt2$$constant;
15419 if (icnt2 >= 8) {
15420 // IndexOf for constant substrings with size >= 8 elements
15421 // which don't need to be loaded through stack.
15422 __ string_indexofC8($str1$$Register, $str2$$Register,
15423 $cnt1$$Register, $cnt2$$Register,
15424 icnt2, $result$$Register,
15425 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15426 } else {
15427 // Small strings are loaded through stack if they cross page boundary.
15428 __ string_indexof($str1$$Register, $str2$$Register,
15429 $cnt1$$Register, $cnt2$$Register,
15430 icnt2, $result$$Register,
15431 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15432 }
15433 %}
15434 ins_pipe( pipe_slow );
15435 %}
15436
15437 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15438 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15439 %{
15440 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15441 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15442 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15443
15444 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15445 ins_encode %{
15446 __ string_indexof($str1$$Register, $str2$$Register,
15447 $cnt1$$Register, $cnt2$$Register,
15448 (-1), $result$$Register,
15449 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15450 %}
15451 ins_pipe( pipe_slow );
15452 %}
15453
15454 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15455 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15456 %{
15457 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15458 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15459 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15460
15461 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15462 ins_encode %{
15463 __ string_indexof($str1$$Register, $str2$$Register,
15464 $cnt1$$Register, $cnt2$$Register,
15465 (-1), $result$$Register,
15466 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15467 %}
15468 ins_pipe( pipe_slow );
15469 %}
15470
15471 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15472 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15473 %{
15474 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15475 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15476 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15477
15478 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15479 ins_encode %{
15480 __ string_indexof($str1$$Register, $str2$$Register,
15481 $cnt1$$Register, $cnt2$$Register,
15482 (-1), $result$$Register,
15483 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15484 %}
15485 ins_pipe( pipe_slow );
15486 %}
15487
15488 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15489 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15490 %{
15491 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15492 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15493 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15494 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15495 ins_encode %{
15496 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15497 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15498 %}
15499 ins_pipe( pipe_slow );
15500 %}
15501
15502 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15503 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15504 %{
15505 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15506 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15507 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15508 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15509 ins_encode %{
15510 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15511 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15512 %}
15513 ins_pipe( pipe_slow );
15514 %}
15515
15516 // fast string equals
15517 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15518 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15519 %{
15520 predicate(!VM_Version::supports_avx512vlbw());
15521 match(Set result (StrEquals (Binary str1 str2) cnt));
15522 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15523
15524 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15525 ins_encode %{
15526 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15527 $cnt$$Register, $result$$Register, $tmp3$$Register,
15528 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15529 %}
15530 ins_pipe( pipe_slow );
15531 %}
15532
15533 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15534 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15535 %{
15536 predicate(VM_Version::supports_avx512vlbw());
15537 match(Set result (StrEquals (Binary str1 str2) cnt));
15538 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15539
15540 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15541 ins_encode %{
15542 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15543 $cnt$$Register, $result$$Register, $tmp3$$Register,
15544 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15545 %}
15546 ins_pipe( pipe_slow );
15547 %}
15548
15549 // fast array equals
15550 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15551 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15552 %{
15553 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15554 match(Set result (AryEq ary1 ary2));
15555 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15556
15557 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15558 ins_encode %{
15559 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15560 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15561 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15562 %}
15563 ins_pipe( pipe_slow );
15564 %}
15565
15566 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15567 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15568 %{
15569 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15570 match(Set result (AryEq ary1 ary2));
15571 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15572
15573 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15574 ins_encode %{
15575 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15576 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15577 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15578 %}
15579 ins_pipe( pipe_slow );
15580 %}
15581
15582 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15583 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15584 %{
15585 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15586 match(Set result (AryEq ary1 ary2));
15587 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15588
15589 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15590 ins_encode %{
15591 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15592 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15593 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15594 %}
15595 ins_pipe( pipe_slow );
15596 %}
15597
15598 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15599 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15600 %{
15601 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15602 match(Set result (AryEq ary1 ary2));
15603 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15604
15605 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15606 ins_encode %{
15607 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15608 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15609 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15610 %}
15611 ins_pipe( pipe_slow );
15612 %}
15613
15614 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15615 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15616 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15617 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15618 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15619 %{
15620 predicate(UseAVX >= 2);
15621 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15622 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15623 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15624 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15625 USE basic_type, KILL cr);
15626
15627 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15628 ins_encode %{
15629 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15630 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15631 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15632 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15633 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15634 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15635 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15636 %}
15637 ins_pipe( pipe_slow );
15638 %}
15639
15640 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15641 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15642 %{
15643 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15644 match(Set result (CountPositives ary1 len));
15645 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15646
15647 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15648 ins_encode %{
15649 __ count_positives($ary1$$Register, $len$$Register,
15650 $result$$Register, $tmp3$$Register,
15651 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15652 %}
15653 ins_pipe( pipe_slow );
15654 %}
15655
15656 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15657 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15658 %{
15659 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15660 match(Set result (CountPositives ary1 len));
15661 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15662
15663 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15664 ins_encode %{
15665 __ count_positives($ary1$$Register, $len$$Register,
15666 $result$$Register, $tmp3$$Register,
15667 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15668 %}
15669 ins_pipe( pipe_slow );
15670 %}
15671
15672 // fast char[] to byte[] compression
15673 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15674 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15675 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15676 match(Set result (StrCompressedCopy src (Binary dst len)));
15677 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15678 USE_KILL len, KILL tmp5, KILL cr);
15679
15680 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15681 ins_encode %{
15682 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15683 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15684 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15685 knoreg, knoreg);
15686 %}
15687 ins_pipe( pipe_slow );
15688 %}
15689
15690 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15691 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15692 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15693 match(Set result (StrCompressedCopy src (Binary dst len)));
15694 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15695 USE_KILL len, KILL tmp5, KILL cr);
15696
15697 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15698 ins_encode %{
15699 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15700 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15701 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15702 $ktmp1$$KRegister, $ktmp2$$KRegister);
15703 %}
15704 ins_pipe( pipe_slow );
15705 %}
15706 // fast byte[] to char[] inflation
15707 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15708 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15709 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15710 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15711 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15712
15713 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15714 ins_encode %{
15715 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15716 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15717 %}
15718 ins_pipe( pipe_slow );
15719 %}
15720
15721 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15722 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15723 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15724 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15725 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15726
15727 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15728 ins_encode %{
15729 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15730 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15731 %}
15732 ins_pipe( pipe_slow );
15733 %}
15734
15735 // encode char[] to byte[] in ISO_8859_1
15736 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15737 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15738 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15739 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15740 match(Set result (EncodeISOArray src (Binary dst len)));
15741 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15742
15743 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15744 ins_encode %{
15745 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15746 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15747 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15748 %}
15749 ins_pipe( pipe_slow );
15750 %}
15751
15752 // encode char[] to byte[] in ASCII
15753 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15754 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15755 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15756 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15757 match(Set result (EncodeISOArray src (Binary dst len)));
15758 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15759
15760 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15761 ins_encode %{
15762 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15763 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15764 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15765 %}
15766 ins_pipe( pipe_slow );
15767 %}
15768
15769 //----------Overflow Math Instructions-----------------------------------------
15770
15771 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15772 %{
15773 match(Set cr (OverflowAddI op1 op2));
15774 effect(DEF cr, USE_KILL op1, USE op2);
15775
15776 format %{ "addl $op1, $op2\t# overflow check int" %}
15777
15778 ins_encode %{
15779 __ addl($op1$$Register, $op2$$Register);
15780 %}
15781 ins_pipe(ialu_reg_reg);
15782 %}
15783
15784 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15785 %{
15786 match(Set cr (OverflowAddI op1 op2));
15787 effect(DEF cr, USE_KILL op1, USE op2);
15788
15789 format %{ "addl $op1, $op2\t# overflow check int" %}
15790
15791 ins_encode %{
15792 __ addl($op1$$Register, $op2$$constant);
15793 %}
15794 ins_pipe(ialu_reg_reg);
15795 %}
15796
15797 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15798 %{
15799 match(Set cr (OverflowAddL op1 op2));
15800 effect(DEF cr, USE_KILL op1, USE op2);
15801
15802 format %{ "addq $op1, $op2\t# overflow check long" %}
15803 ins_encode %{
15804 __ addq($op1$$Register, $op2$$Register);
15805 %}
15806 ins_pipe(ialu_reg_reg);
15807 %}
15808
15809 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15810 %{
15811 match(Set cr (OverflowAddL op1 op2));
15812 effect(DEF cr, USE_KILL op1, USE op2);
15813
15814 format %{ "addq $op1, $op2\t# overflow check long" %}
15815 ins_encode %{
15816 __ addq($op1$$Register, $op2$$constant);
15817 %}
15818 ins_pipe(ialu_reg_reg);
15819 %}
15820
15821 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15822 %{
15823 match(Set cr (OverflowSubI op1 op2));
15824
15825 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15826 ins_encode %{
15827 __ cmpl($op1$$Register, $op2$$Register);
15828 %}
15829 ins_pipe(ialu_reg_reg);
15830 %}
15831
15832 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15833 %{
15834 match(Set cr (OverflowSubI op1 op2));
15835
15836 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15837 ins_encode %{
15838 __ cmpl($op1$$Register, $op2$$constant);
15839 %}
15840 ins_pipe(ialu_reg_reg);
15841 %}
15842
15843 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15844 %{
15845 match(Set cr (OverflowSubL op1 op2));
15846
15847 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15848 ins_encode %{
15849 __ cmpq($op1$$Register, $op2$$Register);
15850 %}
15851 ins_pipe(ialu_reg_reg);
15852 %}
15853
15854 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15855 %{
15856 match(Set cr (OverflowSubL op1 op2));
15857
15858 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15859 ins_encode %{
15860 __ cmpq($op1$$Register, $op2$$constant);
15861 %}
15862 ins_pipe(ialu_reg_reg);
15863 %}
15864
15865 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15866 %{
15867 match(Set cr (OverflowSubI zero op2));
15868 effect(DEF cr, USE_KILL op2);
15869
15870 format %{ "negl $op2\t# overflow check int" %}
15871 ins_encode %{
15872 __ negl($op2$$Register);
15873 %}
15874 ins_pipe(ialu_reg_reg);
15875 %}
15876
15877 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15878 %{
15879 match(Set cr (OverflowSubL zero op2));
15880 effect(DEF cr, USE_KILL op2);
15881
15882 format %{ "negq $op2\t# overflow check long" %}
15883 ins_encode %{
15884 __ negq($op2$$Register);
15885 %}
15886 ins_pipe(ialu_reg_reg);
15887 %}
15888
15889 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15890 %{
15891 match(Set cr (OverflowMulI op1 op2));
15892 effect(DEF cr, USE_KILL op1, USE op2);
15893
15894 format %{ "imull $op1, $op2\t# overflow check int" %}
15895 ins_encode %{
15896 __ imull($op1$$Register, $op2$$Register);
15897 %}
15898 ins_pipe(ialu_reg_reg_alu0);
15899 %}
15900
15901 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15902 %{
15903 match(Set cr (OverflowMulI op1 op2));
15904 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15905
15906 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15907 ins_encode %{
15908 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15909 %}
15910 ins_pipe(ialu_reg_reg_alu0);
15911 %}
15912
15913 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15914 %{
15915 match(Set cr (OverflowMulL op1 op2));
15916 effect(DEF cr, USE_KILL op1, USE op2);
15917
15918 format %{ "imulq $op1, $op2\t# overflow check long" %}
15919 ins_encode %{
15920 __ imulq($op1$$Register, $op2$$Register);
15921 %}
15922 ins_pipe(ialu_reg_reg_alu0);
15923 %}
15924
15925 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15926 %{
15927 match(Set cr (OverflowMulL op1 op2));
15928 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15929
15930 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15931 ins_encode %{
15932 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15933 %}
15934 ins_pipe(ialu_reg_reg_alu0);
15935 %}
15936
15937
15938 //----------Control Flow Instructions------------------------------------------
15939 // Signed compare Instructions
15940
15941 // XXX more variants!!
15942 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15943 %{
15944 match(Set cr (CmpI op1 op2));
15945 effect(DEF cr, USE op1, USE op2);
15946
15947 format %{ "cmpl $op1, $op2" %}
15948 ins_encode %{
15949 __ cmpl($op1$$Register, $op2$$Register);
15950 %}
15951 ins_pipe(ialu_cr_reg_reg);
15952 %}
15953
15954 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15955 %{
15956 match(Set cr (CmpI op1 op2));
15957
15958 format %{ "cmpl $op1, $op2" %}
15959 ins_encode %{
15960 __ cmpl($op1$$Register, $op2$$constant);
15961 %}
15962 ins_pipe(ialu_cr_reg_imm);
15963 %}
15964
15965 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15966 %{
15967 match(Set cr (CmpI op1 (LoadI op2)));
15968
15969 ins_cost(500); // XXX
15970 format %{ "cmpl $op1, $op2" %}
15971 ins_encode %{
15972 __ cmpl($op1$$Register, $op2$$Address);
15973 %}
15974 ins_pipe(ialu_cr_reg_mem);
15975 %}
15976
15977 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15978 %{
15979 match(Set cr (CmpI src zero));
15980
15981 format %{ "testl $src, $src" %}
15982 ins_encode %{
15983 __ testl($src$$Register, $src$$Register);
15984 %}
15985 ins_pipe(ialu_cr_reg_imm);
15986 %}
15987
15988 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15989 %{
15990 match(Set cr (CmpI (AndI src con) zero));
15991
15992 format %{ "testl $src, $con" %}
15993 ins_encode %{
15994 __ testl($src$$Register, $con$$constant);
15995 %}
15996 ins_pipe(ialu_cr_reg_imm);
15997 %}
15998
15999 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16000 %{
16001 match(Set cr (CmpI (AndI src1 src2) zero));
16002
16003 format %{ "testl $src1, $src2" %}
16004 ins_encode %{
16005 __ testl($src1$$Register, $src2$$Register);
16006 %}
16007 ins_pipe(ialu_cr_reg_imm);
16008 %}
16009
16010 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16011 %{
16012 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16013
16014 format %{ "testl $src, $mem" %}
16015 ins_encode %{
16016 __ testl($src$$Register, $mem$$Address);
16017 %}
16018 ins_pipe(ialu_cr_reg_mem);
16019 %}
16020
16021 // Unsigned compare Instructions; really, same as signed except they
16022 // produce an rFlagsRegU instead of rFlagsReg.
16023 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16024 %{
16025 match(Set cr (CmpU op1 op2));
16026
16027 format %{ "cmpl $op1, $op2\t# unsigned" %}
16028 ins_encode %{
16029 __ cmpl($op1$$Register, $op2$$Register);
16030 %}
16031 ins_pipe(ialu_cr_reg_reg);
16032 %}
16033
16034 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16035 %{
16036 match(Set cr (CmpU op1 op2));
16037
16038 format %{ "cmpl $op1, $op2\t# unsigned" %}
16039 ins_encode %{
16040 __ cmpl($op1$$Register, $op2$$constant);
16041 %}
16042 ins_pipe(ialu_cr_reg_imm);
16043 %}
16044
16045 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16046 %{
16047 match(Set cr (CmpU op1 (LoadI op2)));
16048
16049 ins_cost(500); // XXX
16050 format %{ "cmpl $op1, $op2\t# unsigned" %}
16051 ins_encode %{
16052 __ cmpl($op1$$Register, $op2$$Address);
16053 %}
16054 ins_pipe(ialu_cr_reg_mem);
16055 %}
16056
16057 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16058 %{
16059 match(Set cr (CmpU src zero));
16060
16061 format %{ "testl $src, $src\t# unsigned" %}
16062 ins_encode %{
16063 __ testl($src$$Register, $src$$Register);
16064 %}
16065 ins_pipe(ialu_cr_reg_imm);
16066 %}
16067
16068 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16069 %{
16070 match(Set cr (CmpP op1 op2));
16071
16072 format %{ "cmpq $op1, $op2\t# ptr" %}
16073 ins_encode %{
16074 __ cmpq($op1$$Register, $op2$$Register);
16075 %}
16076 ins_pipe(ialu_cr_reg_reg);
16077 %}
16078
16079 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16080 %{
16081 match(Set cr (CmpP op1 (LoadP op2)));
16082 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16083
16084 ins_cost(500); // XXX
16085 format %{ "cmpq $op1, $op2\t# ptr" %}
16086 ins_encode %{
16087 __ cmpq($op1$$Register, $op2$$Address);
16088 %}
16089 ins_pipe(ialu_cr_reg_mem);
16090 %}
16091
16092 // XXX this is generalized by compP_rReg_mem???
16093 // Compare raw pointer (used in out-of-heap check).
16094 // Only works because non-oop pointers must be raw pointers
16095 // and raw pointers have no anti-dependencies.
16096 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16097 %{
16098 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16099 n->in(2)->as_Load()->barrier_data() == 0);
16100 match(Set cr (CmpP op1 (LoadP op2)));
16101
16102 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16103 ins_encode %{
16104 __ cmpq($op1$$Register, $op2$$Address);
16105 %}
16106 ins_pipe(ialu_cr_reg_mem);
16107 %}
16108
16109 // This will generate a signed flags result. This should be OK since
16110 // any compare to a zero should be eq/neq.
16111 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16112 %{
16113 match(Set cr (CmpP src zero));
16114
16115 format %{ "testq $src, $src\t# ptr" %}
16116 ins_encode %{
16117 __ testq($src$$Register, $src$$Register);
16118 %}
16119 ins_pipe(ialu_cr_reg_imm);
16120 %}
16121
16122 // This will generate a signed flags result. This should be OK since
16123 // any compare to a zero should be eq/neq.
16124 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16125 %{
16126 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16127 n->in(1)->as_Load()->barrier_data() == 0);
16128 match(Set cr (CmpP (LoadP op) zero));
16129
16130 ins_cost(500); // XXX
16131 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16132 ins_encode %{
16133 __ testq($op$$Address, 0xFFFFFFFF);
16134 %}
16135 ins_pipe(ialu_cr_reg_imm);
16136 %}
16137
16138 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16139 %{
16140 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16141 n->in(1)->as_Load()->barrier_data() == 0);
16142 match(Set cr (CmpP (LoadP mem) zero));
16143
16144 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16145 ins_encode %{
16146 __ cmpq(r12, $mem$$Address);
16147 %}
16148 ins_pipe(ialu_cr_reg_mem);
16149 %}
16150
16151 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16152 %{
16153 match(Set cr (CmpN op1 op2));
16154
16155 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16156 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16157 ins_pipe(ialu_cr_reg_reg);
16158 %}
16159
16160 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16161 %{
16162 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16163 match(Set cr (CmpN src (LoadN mem)));
16164
16165 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16166 ins_encode %{
16167 __ cmpl($src$$Register, $mem$$Address);
16168 %}
16169 ins_pipe(ialu_cr_reg_mem);
16170 %}
16171
16172 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16173 match(Set cr (CmpN op1 op2));
16174
16175 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16176 ins_encode %{
16177 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16178 %}
16179 ins_pipe(ialu_cr_reg_imm);
16180 %}
16181
16182 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16183 %{
16184 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16185 match(Set cr (CmpN src (LoadN mem)));
16186
16187 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16188 ins_encode %{
16189 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16190 %}
16191 ins_pipe(ialu_cr_reg_mem);
16192 %}
16193
16194 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16195 match(Set cr (CmpN op1 op2));
16196
16197 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16198 ins_encode %{
16199 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16200 %}
16201 ins_pipe(ialu_cr_reg_imm);
16202 %}
16203
16204 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16205 %{
16206 predicate(!UseCompactObjectHeaders);
16207 match(Set cr (CmpN src (LoadNKlass mem)));
16208
16209 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16210 ins_encode %{
16211 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16212 %}
16213 ins_pipe(ialu_cr_reg_mem);
16214 %}
16215
16216 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16217 match(Set cr (CmpN src zero));
16218
16219 format %{ "testl $src, $src\t# compressed ptr" %}
16220 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16221 ins_pipe(ialu_cr_reg_imm);
16222 %}
16223
16224 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16225 %{
16226 predicate(CompressedOops::base() != nullptr &&
16227 n->in(1)->as_Load()->barrier_data() == 0);
16228 match(Set cr (CmpN (LoadN mem) zero));
16229
16230 ins_cost(500); // XXX
16231 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16232 ins_encode %{
16233 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16234 %}
16235 ins_pipe(ialu_cr_reg_mem);
16236 %}
16237
16238 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16239 %{
16240 predicate(CompressedOops::base() == nullptr &&
16241 n->in(1)->as_Load()->barrier_data() == 0);
16242 match(Set cr (CmpN (LoadN mem) zero));
16243
16244 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16245 ins_encode %{
16246 __ cmpl(r12, $mem$$Address);
16247 %}
16248 ins_pipe(ialu_cr_reg_mem);
16249 %}
16250
16251 // Yanked all unsigned pointer compare operations.
16252 // Pointer compares are done with CmpP which is already unsigned.
16253
16254 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16255 %{
16256 match(Set cr (CmpL op1 op2));
16257
16258 format %{ "cmpq $op1, $op2" %}
16259 ins_encode %{
16260 __ cmpq($op1$$Register, $op2$$Register);
16261 %}
16262 ins_pipe(ialu_cr_reg_reg);
16263 %}
16264
16265 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16266 %{
16267 match(Set cr (CmpL op1 op2));
16268
16269 format %{ "cmpq $op1, $op2" %}
16270 ins_encode %{
16271 __ cmpq($op1$$Register, $op2$$constant);
16272 %}
16273 ins_pipe(ialu_cr_reg_imm);
16274 %}
16275
16276 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16277 %{
16278 match(Set cr (CmpL op1 (LoadL op2)));
16279
16280 format %{ "cmpq $op1, $op2" %}
16281 ins_encode %{
16282 __ cmpq($op1$$Register, $op2$$Address);
16283 %}
16284 ins_pipe(ialu_cr_reg_mem);
16285 %}
16286
16287 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16288 %{
16289 match(Set cr (CmpL src zero));
16290
16291 format %{ "testq $src, $src" %}
16292 ins_encode %{
16293 __ testq($src$$Register, $src$$Register);
16294 %}
16295 ins_pipe(ialu_cr_reg_imm);
16296 %}
16297
16298 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16299 %{
16300 match(Set cr (CmpL (AndL src con) zero));
16301
16302 format %{ "testq $src, $con\t# long" %}
16303 ins_encode %{
16304 __ testq($src$$Register, $con$$constant);
16305 %}
16306 ins_pipe(ialu_cr_reg_imm);
16307 %}
16308
16309 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16310 %{
16311 match(Set cr (CmpL (AndL src1 src2) zero));
16312
16313 format %{ "testq $src1, $src2\t# long" %}
16314 ins_encode %{
16315 __ testq($src1$$Register, $src2$$Register);
16316 %}
16317 ins_pipe(ialu_cr_reg_imm);
16318 %}
16319
16320 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16321 %{
16322 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16323
16324 format %{ "testq $src, $mem" %}
16325 ins_encode %{
16326 __ testq($src$$Register, $mem$$Address);
16327 %}
16328 ins_pipe(ialu_cr_reg_mem);
16329 %}
16330
16331 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16332 %{
16333 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16334
16335 format %{ "testq $src, $mem" %}
16336 ins_encode %{
16337 __ testq($src$$Register, $mem$$Address);
16338 %}
16339 ins_pipe(ialu_cr_reg_mem);
16340 %}
16341
16342 // Manifest a CmpU result in an integer register. Very painful.
16343 // This is the test to avoid.
16344 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16345 %{
16346 match(Set dst (CmpU3 src1 src2));
16347 effect(KILL flags);
16348
16349 ins_cost(275); // XXX
16350 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16351 "movl $dst, -1\n\t"
16352 "jb,u done\n\t"
16353 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16354 "done:" %}
16355 ins_encode %{
16356 Label done;
16357 __ cmpl($src1$$Register, $src2$$Register);
16358 __ movl($dst$$Register, -1);
16359 __ jccb(Assembler::below, done);
16360 __ setcc(Assembler::notZero, $dst$$Register);
16361 __ bind(done);
16362 %}
16363 ins_pipe(pipe_slow);
16364 %}
16365
16366 // Manifest a CmpL result in an integer register. Very painful.
16367 // This is the test to avoid.
16368 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16369 %{
16370 match(Set dst (CmpL3 src1 src2));
16371 effect(KILL flags);
16372
16373 ins_cost(275); // XXX
16374 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16375 "movl $dst, -1\n\t"
16376 "jl,s done\n\t"
16377 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16378 "done:" %}
16379 ins_encode %{
16380 Label done;
16381 __ cmpq($src1$$Register, $src2$$Register);
16382 __ movl($dst$$Register, -1);
16383 __ jccb(Assembler::less, done);
16384 __ setcc(Assembler::notZero, $dst$$Register);
16385 __ bind(done);
16386 %}
16387 ins_pipe(pipe_slow);
16388 %}
16389
16390 // Manifest a CmpUL result in an integer register. Very painful.
16391 // This is the test to avoid.
16392 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16393 %{
16394 match(Set dst (CmpUL3 src1 src2));
16395 effect(KILL flags);
16396
16397 ins_cost(275); // XXX
16398 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16399 "movl $dst, -1\n\t"
16400 "jb,u done\n\t"
16401 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16402 "done:" %}
16403 ins_encode %{
16404 Label done;
16405 __ cmpq($src1$$Register, $src2$$Register);
16406 __ movl($dst$$Register, -1);
16407 __ jccb(Assembler::below, done);
16408 __ setcc(Assembler::notZero, $dst$$Register);
16409 __ bind(done);
16410 %}
16411 ins_pipe(pipe_slow);
16412 %}
16413
16414 // Unsigned long compare Instructions; really, same as signed long except they
16415 // produce an rFlagsRegU instead of rFlagsReg.
16416 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16417 %{
16418 match(Set cr (CmpUL op1 op2));
16419
16420 format %{ "cmpq $op1, $op2\t# unsigned" %}
16421 ins_encode %{
16422 __ cmpq($op1$$Register, $op2$$Register);
16423 %}
16424 ins_pipe(ialu_cr_reg_reg);
16425 %}
16426
16427 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16428 %{
16429 match(Set cr (CmpUL op1 op2));
16430
16431 format %{ "cmpq $op1, $op2\t# unsigned" %}
16432 ins_encode %{
16433 __ cmpq($op1$$Register, $op2$$constant);
16434 %}
16435 ins_pipe(ialu_cr_reg_imm);
16436 %}
16437
16438 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16439 %{
16440 match(Set cr (CmpUL op1 (LoadL op2)));
16441
16442 format %{ "cmpq $op1, $op2\t# unsigned" %}
16443 ins_encode %{
16444 __ cmpq($op1$$Register, $op2$$Address);
16445 %}
16446 ins_pipe(ialu_cr_reg_mem);
16447 %}
16448
16449 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16450 %{
16451 match(Set cr (CmpUL src zero));
16452
16453 format %{ "testq $src, $src\t# unsigned" %}
16454 ins_encode %{
16455 __ testq($src$$Register, $src$$Register);
16456 %}
16457 ins_pipe(ialu_cr_reg_imm);
16458 %}
16459
16460 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16461 %{
16462 match(Set cr (CmpI (LoadB mem) imm));
16463
16464 ins_cost(125);
16465 format %{ "cmpb $mem, $imm" %}
16466 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16467 ins_pipe(ialu_cr_reg_mem);
16468 %}
16469
16470 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16471 %{
16472 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16473
16474 ins_cost(125);
16475 format %{ "testb $mem, $imm\t# ubyte" %}
16476 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16477 ins_pipe(ialu_cr_reg_mem);
16478 %}
16479
16480 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16481 %{
16482 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16483
16484 ins_cost(125);
16485 format %{ "testb $mem, $imm\t# byte" %}
16486 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16487 ins_pipe(ialu_cr_reg_mem);
16488 %}
16489
16490 //----------Max and Min--------------------------------------------------------
16491 // Min Instructions
16492
16493 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16494 %{
16495 predicate(!UseAPX);
16496 effect(USE_DEF dst, USE src, USE cr);
16497
16498 format %{ "cmovlgt $dst, $src\t# min" %}
16499 ins_encode %{
16500 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16501 %}
16502 ins_pipe(pipe_cmov_reg);
16503 %}
16504
16505 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16506 %{
16507 predicate(UseAPX);
16508 effect(DEF dst, USE src1, USE src2, USE cr);
16509
16510 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16511 ins_encode %{
16512 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16513 %}
16514 ins_pipe(pipe_cmov_reg);
16515 %}
16516
16517 instruct minI_rReg(rRegI dst, rRegI src)
16518 %{
16519 predicate(!UseAPX);
16520 match(Set dst (MinI dst src));
16521
16522 ins_cost(200);
16523 expand %{
16524 rFlagsReg cr;
16525 compI_rReg(cr, dst, src);
16526 cmovI_reg_g(dst, src, cr);
16527 %}
16528 %}
16529
16530 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16531 %{
16532 predicate(UseAPX);
16533 match(Set dst (MinI src1 src2));
16534 effect(DEF dst, USE src1, USE src2);
16535
16536 ins_cost(200);
16537 expand %{
16538 rFlagsReg cr;
16539 compI_rReg(cr, src1, src2);
16540 cmovI_reg_g_ndd(dst, src1, src2, cr);
16541 %}
16542 %}
16543
16544 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16545 %{
16546 predicate(!UseAPX);
16547 effect(USE_DEF dst, USE src, USE cr);
16548
16549 format %{ "cmovllt $dst, $src\t# max" %}
16550 ins_encode %{
16551 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16552 %}
16553 ins_pipe(pipe_cmov_reg);
16554 %}
16555
16556 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16557 %{
16558 predicate(UseAPX);
16559 effect(DEF dst, USE src1, USE src2, USE cr);
16560
16561 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16562 ins_encode %{
16563 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16564 %}
16565 ins_pipe(pipe_cmov_reg);
16566 %}
16567
16568 instruct maxI_rReg(rRegI dst, rRegI src)
16569 %{
16570 predicate(!UseAPX);
16571 match(Set dst (MaxI dst src));
16572
16573 ins_cost(200);
16574 expand %{
16575 rFlagsReg cr;
16576 compI_rReg(cr, dst, src);
16577 cmovI_reg_l(dst, src, cr);
16578 %}
16579 %}
16580
16581 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16582 %{
16583 predicate(UseAPX);
16584 match(Set dst (MaxI src1 src2));
16585 effect(DEF dst, USE src1, USE src2);
16586
16587 ins_cost(200);
16588 expand %{
16589 rFlagsReg cr;
16590 compI_rReg(cr, src1, src2);
16591 cmovI_reg_l_ndd(dst, src1, src2, cr);
16592 %}
16593 %}
16594
16595 // ============================================================================
16596 // Branch Instructions
16597
16598 // Jump Direct - Label defines a relative address from JMP+1
16599 instruct jmpDir(label labl)
16600 %{
16601 match(Goto);
16602 effect(USE labl);
16603
16604 ins_cost(300);
16605 format %{ "jmp $labl" %}
16606 size(5);
16607 ins_encode %{
16608 Label* L = $labl$$label;
16609 __ jmp(*L, false); // Always long jump
16610 %}
16611 ins_pipe(pipe_jmp);
16612 %}
16613
16614 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16615 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16616 %{
16617 match(If cop cr);
16618 effect(USE labl);
16619
16620 ins_cost(300);
16621 format %{ "j$cop $labl" %}
16622 size(6);
16623 ins_encode %{
16624 Label* L = $labl$$label;
16625 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16626 %}
16627 ins_pipe(pipe_jcc);
16628 %}
16629
16630 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16631 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16632 %{
16633 match(CountedLoopEnd cop cr);
16634 effect(USE labl);
16635
16636 ins_cost(300);
16637 format %{ "j$cop $labl\t# loop end" %}
16638 size(6);
16639 ins_encode %{
16640 Label* L = $labl$$label;
16641 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16642 %}
16643 ins_pipe(pipe_jcc);
16644 %}
16645
16646 // Jump Direct Conditional - using unsigned comparison
16647 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16648 match(If cop cmp);
16649 effect(USE labl);
16650
16651 ins_cost(300);
16652 format %{ "j$cop,u $labl" %}
16653 size(6);
16654 ins_encode %{
16655 Label* L = $labl$$label;
16656 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16657 %}
16658 ins_pipe(pipe_jcc);
16659 %}
16660
16661 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16662 match(If cop cmp);
16663 effect(USE labl);
16664
16665 ins_cost(200);
16666 format %{ "j$cop,u $labl" %}
16667 size(6);
16668 ins_encode %{
16669 Label* L = $labl$$label;
16670 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16671 %}
16672 ins_pipe(pipe_jcc);
16673 %}
16674
16675 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16676 match(If cop cmp);
16677 effect(USE labl);
16678
16679 ins_cost(200);
16680 format %{ $$template
16681 if ($cop$$cmpcode == Assembler::notEqual) {
16682 $$emit$$"jp,u $labl\n\t"
16683 $$emit$$"j$cop,u $labl"
16684 } else {
16685 $$emit$$"jp,u done\n\t"
16686 $$emit$$"j$cop,u $labl\n\t"
16687 $$emit$$"done:"
16688 }
16689 %}
16690 ins_encode %{
16691 Label* l = $labl$$label;
16692 if ($cop$$cmpcode == Assembler::notEqual) {
16693 __ jcc(Assembler::parity, *l, false);
16694 __ jcc(Assembler::notEqual, *l, false);
16695 } else if ($cop$$cmpcode == Assembler::equal) {
16696 Label done;
16697 __ jccb(Assembler::parity, done);
16698 __ jcc(Assembler::equal, *l, false);
16699 __ bind(done);
16700 } else {
16701 ShouldNotReachHere();
16702 }
16703 %}
16704 ins_pipe(pipe_jcc);
16705 %}
16706
16707 // ============================================================================
16708 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16709 // superklass array for an instance of the superklass. Set a hidden
16710 // internal cache on a hit (cache is checked with exposed code in
16711 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16712 // encoding ALSO sets flags.
16713
16714 instruct partialSubtypeCheck(rdi_RegP result,
16715 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16716 rFlagsReg cr)
16717 %{
16718 match(Set result (PartialSubtypeCheck sub super));
16719 predicate(!UseSecondarySupersTable);
16720 effect(KILL rcx, KILL cr);
16721
16722 ins_cost(1100); // slightly larger than the next version
16723 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16724 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16725 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16726 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16727 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16728 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16729 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16730 "miss:\t" %}
16731
16732 ins_encode %{
16733 Label miss;
16734 // NB: Callers may assume that, when $result is a valid register,
16735 // check_klass_subtype_slow_path_linear sets it to a nonzero
16736 // value.
16737 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16738 $rcx$$Register, $result$$Register,
16739 nullptr, &miss,
16740 /*set_cond_codes:*/ true);
16741 __ xorptr($result$$Register, $result$$Register);
16742 __ bind(miss);
16743 %}
16744
16745 ins_pipe(pipe_slow);
16746 %}
16747
16748 // ============================================================================
16749 // Two versions of hashtable-based partialSubtypeCheck, both used when
16750 // we need to search for a super class in the secondary supers array.
16751 // The first is used when we don't know _a priori_ the class being
16752 // searched for. The second, far more common, is used when we do know:
16753 // this is used for instanceof, checkcast, and any case where C2 can
16754 // determine it by constant propagation.
16755
16756 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16757 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16758 rFlagsReg cr)
16759 %{
16760 match(Set result (PartialSubtypeCheck sub super));
16761 predicate(UseSecondarySupersTable);
16762 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16763
16764 ins_cost(1000);
16765 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16766
16767 ins_encode %{
16768 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16769 $temp3$$Register, $temp4$$Register, $result$$Register);
16770 %}
16771
16772 ins_pipe(pipe_slow);
16773 %}
16774
16775 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16776 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16777 rFlagsReg cr)
16778 %{
16779 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16780 predicate(UseSecondarySupersTable);
16781 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16782
16783 ins_cost(700); // smaller than the next version
16784 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16785
16786 ins_encode %{
16787 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16788 if (InlineSecondarySupersTest) {
16789 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16790 $temp3$$Register, $temp4$$Register, $result$$Register,
16791 super_klass_slot);
16792 } else {
16793 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16794 }
16795 %}
16796
16797 ins_pipe(pipe_slow);
16798 %}
16799
16800 // ============================================================================
16801 // Branch Instructions -- short offset versions
16802 //
16803 // These instructions are used to replace jumps of a long offset (the default
16804 // match) with jumps of a shorter offset. These instructions are all tagged
16805 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16806 // match rules in general matching. Instead, the ADLC generates a conversion
16807 // method in the MachNode which can be used to do in-place replacement of the
16808 // long variant with the shorter variant. The compiler will determine if a
16809 // branch can be taken by the is_short_branch_offset() predicate in the machine
16810 // specific code section of the file.
16811
16812 // Jump Direct - Label defines a relative address from JMP+1
16813 instruct jmpDir_short(label labl) %{
16814 match(Goto);
16815 effect(USE labl);
16816
16817 ins_cost(300);
16818 format %{ "jmp,s $labl" %}
16819 size(2);
16820 ins_encode %{
16821 Label* L = $labl$$label;
16822 __ jmpb(*L);
16823 %}
16824 ins_pipe(pipe_jmp);
16825 ins_short_branch(1);
16826 %}
16827
16828 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16829 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16830 match(If cop cr);
16831 effect(USE labl);
16832
16833 ins_cost(300);
16834 format %{ "j$cop,s $labl" %}
16835 size(2);
16836 ins_encode %{
16837 Label* L = $labl$$label;
16838 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16839 %}
16840 ins_pipe(pipe_jcc);
16841 ins_short_branch(1);
16842 %}
16843
16844 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16845 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16846 match(CountedLoopEnd cop cr);
16847 effect(USE labl);
16848
16849 ins_cost(300);
16850 format %{ "j$cop,s $labl\t# loop end" %}
16851 size(2);
16852 ins_encode %{
16853 Label* L = $labl$$label;
16854 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16855 %}
16856 ins_pipe(pipe_jcc);
16857 ins_short_branch(1);
16858 %}
16859
16860 // Jump Direct Conditional - using unsigned comparison
16861 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16862 match(If cop cmp);
16863 effect(USE labl);
16864
16865 ins_cost(300);
16866 format %{ "j$cop,us $labl" %}
16867 size(2);
16868 ins_encode %{
16869 Label* L = $labl$$label;
16870 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16871 %}
16872 ins_pipe(pipe_jcc);
16873 ins_short_branch(1);
16874 %}
16875
16876 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16877 match(If cop cmp);
16878 effect(USE labl);
16879
16880 ins_cost(300);
16881 format %{ "j$cop,us $labl" %}
16882 size(2);
16883 ins_encode %{
16884 Label* L = $labl$$label;
16885 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16886 %}
16887 ins_pipe(pipe_jcc);
16888 ins_short_branch(1);
16889 %}
16890
16891 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16892 match(If cop cmp);
16893 effect(USE labl);
16894
16895 ins_cost(300);
16896 format %{ $$template
16897 if ($cop$$cmpcode == Assembler::notEqual) {
16898 $$emit$$"jp,u,s $labl\n\t"
16899 $$emit$$"j$cop,u,s $labl"
16900 } else {
16901 $$emit$$"jp,u,s done\n\t"
16902 $$emit$$"j$cop,u,s $labl\n\t"
16903 $$emit$$"done:"
16904 }
16905 %}
16906 size(4);
16907 ins_encode %{
16908 Label* l = $labl$$label;
16909 if ($cop$$cmpcode == Assembler::notEqual) {
16910 __ jccb(Assembler::parity, *l);
16911 __ jccb(Assembler::notEqual, *l);
16912 } else if ($cop$$cmpcode == Assembler::equal) {
16913 Label done;
16914 __ jccb(Assembler::parity, done);
16915 __ jccb(Assembler::equal, *l);
16916 __ bind(done);
16917 } else {
16918 ShouldNotReachHere();
16919 }
16920 %}
16921 ins_pipe(pipe_jcc);
16922 ins_short_branch(1);
16923 %}
16924
16925 // ============================================================================
16926 // inlined locking and unlocking
16927
16928 instruct cmpFastLockLightweight(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16929 match(Set cr (FastLock object box));
16930 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16931 ins_cost(300);
16932 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16933 ins_encode %{
16934 __ fast_lock_lightweight($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16935 %}
16936 ins_pipe(pipe_slow);
16937 %}
16938
16939 instruct cmpFastUnlockLightweight(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16940 match(Set cr (FastUnlock object rax_reg));
16941 effect(TEMP tmp, USE_KILL rax_reg);
16942 ins_cost(300);
16943 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16944 ins_encode %{
16945 __ fast_unlock_lightweight($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16946 %}
16947 ins_pipe(pipe_slow);
16948 %}
16949
16950
16951 // ============================================================================
16952 // Safepoint Instructions
16953 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16954 %{
16955 match(SafePoint poll);
16956 effect(KILL cr, USE poll);
16957
16958 format %{ "testl rax, [$poll]\t"
16959 "# Safepoint: poll for GC" %}
16960 ins_cost(125);
16961 ins_encode %{
16962 __ relocate(relocInfo::poll_type);
16963 address pre_pc = __ pc();
16964 __ testl(rax, Address($poll$$Register, 0));
16965 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16966 %}
16967 ins_pipe(ialu_reg_mem);
16968 %}
16969
16970 instruct mask_all_evexL(kReg dst, rRegL src) %{
16971 match(Set dst (MaskAll src));
16972 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16973 ins_encode %{
16974 int mask_len = Matcher::vector_length(this);
16975 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16976 %}
16977 ins_pipe( pipe_slow );
16978 %}
16979
16980 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16981 predicate(Matcher::vector_length(n) > 32);
16982 match(Set dst (MaskAll src));
16983 effect(TEMP tmp);
16984 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16985 ins_encode %{
16986 int mask_len = Matcher::vector_length(this);
16987 __ movslq($tmp$$Register, $src$$Register);
16988 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16989 %}
16990 ins_pipe( pipe_slow );
16991 %}
16992
16993 // ============================================================================
16994 // Procedure Call/Return Instructions
16995 // Call Java Static Instruction
16996 // Note: If this code changes, the corresponding ret_addr_offset() and
16997 // compute_padding() functions will have to be adjusted.
16998 instruct CallStaticJavaDirect(method meth) %{
16999 match(CallStaticJava);
17000 effect(USE meth);
17001
17002 ins_cost(300);
17003 format %{ "call,static " %}
17004 opcode(0xE8); /* E8 cd */
17005 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17006 ins_pipe(pipe_slow);
17007 ins_alignment(4);
17008 %}
17009
17010 // Call Java Dynamic Instruction
17011 // Note: If this code changes, the corresponding ret_addr_offset() and
17012 // compute_padding() functions will have to be adjusted.
17013 instruct CallDynamicJavaDirect(method meth)
17014 %{
17015 match(CallDynamicJava);
17016 effect(USE meth);
17017
17018 ins_cost(300);
17019 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17020 "call,dynamic " %}
17021 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17022 ins_pipe(pipe_slow);
17023 ins_alignment(4);
17024 %}
17025
17026 // Call Runtime Instruction
17027 instruct CallRuntimeDirect(method meth)
17028 %{
17029 match(CallRuntime);
17030 effect(USE meth);
17031
17032 ins_cost(300);
17033 format %{ "call,runtime " %}
17034 ins_encode(clear_avx, Java_To_Runtime(meth));
17035 ins_pipe(pipe_slow);
17036 %}
17037
17038 // Call runtime without safepoint
17039 instruct CallLeafDirect(method meth)
17040 %{
17041 match(CallLeaf);
17042 effect(USE meth);
17043
17044 ins_cost(300);
17045 format %{ "call_leaf,runtime " %}
17046 ins_encode(clear_avx, Java_To_Runtime(meth));
17047 ins_pipe(pipe_slow);
17048 %}
17049
17050 // Call runtime without safepoint and with vector arguments
17051 instruct CallLeafDirectVector(method meth)
17052 %{
17053 match(CallLeafVector);
17054 effect(USE meth);
17055
17056 ins_cost(300);
17057 format %{ "call_leaf,vector " %}
17058 ins_encode(Java_To_Runtime(meth));
17059 ins_pipe(pipe_slow);
17060 %}
17061
17062 // Call runtime without safepoint
17063 instruct CallLeafNoFPDirect(method meth)
17064 %{
17065 match(CallLeafNoFP);
17066 effect(USE meth);
17067
17068 ins_cost(300);
17069 format %{ "call_leaf_nofp,runtime " %}
17070 ins_encode(clear_avx, Java_To_Runtime(meth));
17071 ins_pipe(pipe_slow);
17072 %}
17073
17074 // Return Instruction
17075 // Remove the return address & jump to it.
17076 // Notice: We always emit a nop after a ret to make sure there is room
17077 // for safepoint patching
17078 instruct Ret()
17079 %{
17080 match(Return);
17081
17082 format %{ "ret" %}
17083 ins_encode %{
17084 __ ret(0);
17085 %}
17086 ins_pipe(pipe_jmp);
17087 %}
17088
17089 // Tail Call; Jump from runtime stub to Java code.
17090 // Also known as an 'interprocedural jump'.
17091 // Target of jump will eventually return to caller.
17092 // TailJump below removes the return address.
17093 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17094 // emitted just above the TailCall which has reset rbp to the caller state.
17095 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17096 %{
17097 match(TailCall jump_target method_ptr);
17098
17099 ins_cost(300);
17100 format %{ "jmp $jump_target\t# rbx holds method" %}
17101 ins_encode %{
17102 __ jmp($jump_target$$Register);
17103 %}
17104 ins_pipe(pipe_jmp);
17105 %}
17106
17107 // Tail Jump; remove the return address; jump to target.
17108 // TailCall above leaves the return address around.
17109 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17110 %{
17111 match(TailJump jump_target ex_oop);
17112
17113 ins_cost(300);
17114 format %{ "popq rdx\t# pop return address\n\t"
17115 "jmp $jump_target" %}
17116 ins_encode %{
17117 __ popq(as_Register(RDX_enc));
17118 __ jmp($jump_target$$Register);
17119 %}
17120 ins_pipe(pipe_jmp);
17121 %}
17122
17123 // Forward exception.
17124 instruct ForwardExceptionjmp()
17125 %{
17126 match(ForwardException);
17127
17128 format %{ "jmp forward_exception_stub" %}
17129 ins_encode %{
17130 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17131 %}
17132 ins_pipe(pipe_jmp);
17133 %}
17134
17135 // Create exception oop: created by stack-crawling runtime code.
17136 // Created exception is now available to this handler, and is setup
17137 // just prior to jumping to this handler. No code emitted.
17138 instruct CreateException(rax_RegP ex_oop)
17139 %{
17140 match(Set ex_oop (CreateEx));
17141
17142 size(0);
17143 // use the following format syntax
17144 format %{ "# exception oop is in rax; no code emitted" %}
17145 ins_encode();
17146 ins_pipe(empty);
17147 %}
17148
17149 // Rethrow exception:
17150 // The exception oop will come in the first argument position.
17151 // Then JUMP (not call) to the rethrow stub code.
17152 instruct RethrowException()
17153 %{
17154 match(Rethrow);
17155
17156 // use the following format syntax
17157 format %{ "jmp rethrow_stub" %}
17158 ins_encode %{
17159 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17160 %}
17161 ins_pipe(pipe_jmp);
17162 %}
17163
17164 // ============================================================================
17165 // This name is KNOWN by the ADLC and cannot be changed.
17166 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17167 // for this guy.
17168 instruct tlsLoadP(r15_RegP dst) %{
17169 match(Set dst (ThreadLocal));
17170 effect(DEF dst);
17171
17172 size(0);
17173 format %{ "# TLS is in R15" %}
17174 ins_encode( /*empty encoding*/ );
17175 ins_pipe(ialu_reg_reg);
17176 %}
17177
17178 instruct addF_reg(regF dst, regF src) %{
17179 predicate(UseAVX == 0);
17180 match(Set dst (AddF dst src));
17181
17182 format %{ "addss $dst, $src" %}
17183 ins_cost(150);
17184 ins_encode %{
17185 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17186 %}
17187 ins_pipe(pipe_slow);
17188 %}
17189
17190 instruct addF_mem(regF dst, memory src) %{
17191 predicate(UseAVX == 0);
17192 match(Set dst (AddF dst (LoadF src)));
17193
17194 format %{ "addss $dst, $src" %}
17195 ins_cost(150);
17196 ins_encode %{
17197 __ addss($dst$$XMMRegister, $src$$Address);
17198 %}
17199 ins_pipe(pipe_slow);
17200 %}
17201
17202 instruct addF_imm(regF dst, immF con) %{
17203 predicate(UseAVX == 0);
17204 match(Set dst (AddF dst con));
17205 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17206 ins_cost(150);
17207 ins_encode %{
17208 __ addss($dst$$XMMRegister, $constantaddress($con));
17209 %}
17210 ins_pipe(pipe_slow);
17211 %}
17212
17213 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17214 predicate(UseAVX > 0);
17215 match(Set dst (AddF src1 src2));
17216
17217 format %{ "vaddss $dst, $src1, $src2" %}
17218 ins_cost(150);
17219 ins_encode %{
17220 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17221 %}
17222 ins_pipe(pipe_slow);
17223 %}
17224
17225 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17226 predicate(UseAVX > 0);
17227 match(Set dst (AddF src1 (LoadF src2)));
17228
17229 format %{ "vaddss $dst, $src1, $src2" %}
17230 ins_cost(150);
17231 ins_encode %{
17232 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17233 %}
17234 ins_pipe(pipe_slow);
17235 %}
17236
17237 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17238 predicate(UseAVX > 0);
17239 match(Set dst (AddF src con));
17240
17241 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17242 ins_cost(150);
17243 ins_encode %{
17244 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17245 %}
17246 ins_pipe(pipe_slow);
17247 %}
17248
17249 instruct addD_reg(regD dst, regD src) %{
17250 predicate(UseAVX == 0);
17251 match(Set dst (AddD dst src));
17252
17253 format %{ "addsd $dst, $src" %}
17254 ins_cost(150);
17255 ins_encode %{
17256 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17257 %}
17258 ins_pipe(pipe_slow);
17259 %}
17260
17261 instruct addD_mem(regD dst, memory src) %{
17262 predicate(UseAVX == 0);
17263 match(Set dst (AddD dst (LoadD src)));
17264
17265 format %{ "addsd $dst, $src" %}
17266 ins_cost(150);
17267 ins_encode %{
17268 __ addsd($dst$$XMMRegister, $src$$Address);
17269 %}
17270 ins_pipe(pipe_slow);
17271 %}
17272
17273 instruct addD_imm(regD dst, immD con) %{
17274 predicate(UseAVX == 0);
17275 match(Set dst (AddD dst con));
17276 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17277 ins_cost(150);
17278 ins_encode %{
17279 __ addsd($dst$$XMMRegister, $constantaddress($con));
17280 %}
17281 ins_pipe(pipe_slow);
17282 %}
17283
17284 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17285 predicate(UseAVX > 0);
17286 match(Set dst (AddD src1 src2));
17287
17288 format %{ "vaddsd $dst, $src1, $src2" %}
17289 ins_cost(150);
17290 ins_encode %{
17291 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17292 %}
17293 ins_pipe(pipe_slow);
17294 %}
17295
17296 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17297 predicate(UseAVX > 0);
17298 match(Set dst (AddD src1 (LoadD src2)));
17299
17300 format %{ "vaddsd $dst, $src1, $src2" %}
17301 ins_cost(150);
17302 ins_encode %{
17303 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17304 %}
17305 ins_pipe(pipe_slow);
17306 %}
17307
17308 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17309 predicate(UseAVX > 0);
17310 match(Set dst (AddD src con));
17311
17312 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17313 ins_cost(150);
17314 ins_encode %{
17315 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17316 %}
17317 ins_pipe(pipe_slow);
17318 %}
17319
17320 instruct subF_reg(regF dst, regF src) %{
17321 predicate(UseAVX == 0);
17322 match(Set dst (SubF dst src));
17323
17324 format %{ "subss $dst, $src" %}
17325 ins_cost(150);
17326 ins_encode %{
17327 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17328 %}
17329 ins_pipe(pipe_slow);
17330 %}
17331
17332 instruct subF_mem(regF dst, memory src) %{
17333 predicate(UseAVX == 0);
17334 match(Set dst (SubF dst (LoadF src)));
17335
17336 format %{ "subss $dst, $src" %}
17337 ins_cost(150);
17338 ins_encode %{
17339 __ subss($dst$$XMMRegister, $src$$Address);
17340 %}
17341 ins_pipe(pipe_slow);
17342 %}
17343
17344 instruct subF_imm(regF dst, immF con) %{
17345 predicate(UseAVX == 0);
17346 match(Set dst (SubF dst con));
17347 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17348 ins_cost(150);
17349 ins_encode %{
17350 __ subss($dst$$XMMRegister, $constantaddress($con));
17351 %}
17352 ins_pipe(pipe_slow);
17353 %}
17354
17355 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17356 predicate(UseAVX > 0);
17357 match(Set dst (SubF src1 src2));
17358
17359 format %{ "vsubss $dst, $src1, $src2" %}
17360 ins_cost(150);
17361 ins_encode %{
17362 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17363 %}
17364 ins_pipe(pipe_slow);
17365 %}
17366
17367 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17368 predicate(UseAVX > 0);
17369 match(Set dst (SubF src1 (LoadF src2)));
17370
17371 format %{ "vsubss $dst, $src1, $src2" %}
17372 ins_cost(150);
17373 ins_encode %{
17374 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17375 %}
17376 ins_pipe(pipe_slow);
17377 %}
17378
17379 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17380 predicate(UseAVX > 0);
17381 match(Set dst (SubF src con));
17382
17383 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17384 ins_cost(150);
17385 ins_encode %{
17386 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17387 %}
17388 ins_pipe(pipe_slow);
17389 %}
17390
17391 instruct subD_reg(regD dst, regD src) %{
17392 predicate(UseAVX == 0);
17393 match(Set dst (SubD dst src));
17394
17395 format %{ "subsd $dst, $src" %}
17396 ins_cost(150);
17397 ins_encode %{
17398 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17399 %}
17400 ins_pipe(pipe_slow);
17401 %}
17402
17403 instruct subD_mem(regD dst, memory src) %{
17404 predicate(UseAVX == 0);
17405 match(Set dst (SubD dst (LoadD src)));
17406
17407 format %{ "subsd $dst, $src" %}
17408 ins_cost(150);
17409 ins_encode %{
17410 __ subsd($dst$$XMMRegister, $src$$Address);
17411 %}
17412 ins_pipe(pipe_slow);
17413 %}
17414
17415 instruct subD_imm(regD dst, immD con) %{
17416 predicate(UseAVX == 0);
17417 match(Set dst (SubD dst con));
17418 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17419 ins_cost(150);
17420 ins_encode %{
17421 __ subsd($dst$$XMMRegister, $constantaddress($con));
17422 %}
17423 ins_pipe(pipe_slow);
17424 %}
17425
17426 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17427 predicate(UseAVX > 0);
17428 match(Set dst (SubD src1 src2));
17429
17430 format %{ "vsubsd $dst, $src1, $src2" %}
17431 ins_cost(150);
17432 ins_encode %{
17433 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17434 %}
17435 ins_pipe(pipe_slow);
17436 %}
17437
17438 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17439 predicate(UseAVX > 0);
17440 match(Set dst (SubD src1 (LoadD src2)));
17441
17442 format %{ "vsubsd $dst, $src1, $src2" %}
17443 ins_cost(150);
17444 ins_encode %{
17445 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17446 %}
17447 ins_pipe(pipe_slow);
17448 %}
17449
17450 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17451 predicate(UseAVX > 0);
17452 match(Set dst (SubD src con));
17453
17454 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17455 ins_cost(150);
17456 ins_encode %{
17457 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17458 %}
17459 ins_pipe(pipe_slow);
17460 %}
17461
17462 instruct mulF_reg(regF dst, regF src) %{
17463 predicate(UseAVX == 0);
17464 match(Set dst (MulF dst src));
17465
17466 format %{ "mulss $dst, $src" %}
17467 ins_cost(150);
17468 ins_encode %{
17469 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17470 %}
17471 ins_pipe(pipe_slow);
17472 %}
17473
17474 instruct mulF_mem(regF dst, memory src) %{
17475 predicate(UseAVX == 0);
17476 match(Set dst (MulF dst (LoadF src)));
17477
17478 format %{ "mulss $dst, $src" %}
17479 ins_cost(150);
17480 ins_encode %{
17481 __ mulss($dst$$XMMRegister, $src$$Address);
17482 %}
17483 ins_pipe(pipe_slow);
17484 %}
17485
17486 instruct mulF_imm(regF dst, immF con) %{
17487 predicate(UseAVX == 0);
17488 match(Set dst (MulF dst con));
17489 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17490 ins_cost(150);
17491 ins_encode %{
17492 __ mulss($dst$$XMMRegister, $constantaddress($con));
17493 %}
17494 ins_pipe(pipe_slow);
17495 %}
17496
17497 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17498 predicate(UseAVX > 0);
17499 match(Set dst (MulF src1 src2));
17500
17501 format %{ "vmulss $dst, $src1, $src2" %}
17502 ins_cost(150);
17503 ins_encode %{
17504 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17505 %}
17506 ins_pipe(pipe_slow);
17507 %}
17508
17509 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17510 predicate(UseAVX > 0);
17511 match(Set dst (MulF src1 (LoadF src2)));
17512
17513 format %{ "vmulss $dst, $src1, $src2" %}
17514 ins_cost(150);
17515 ins_encode %{
17516 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17517 %}
17518 ins_pipe(pipe_slow);
17519 %}
17520
17521 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17522 predicate(UseAVX > 0);
17523 match(Set dst (MulF src con));
17524
17525 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17526 ins_cost(150);
17527 ins_encode %{
17528 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17529 %}
17530 ins_pipe(pipe_slow);
17531 %}
17532
17533 instruct mulD_reg(regD dst, regD src) %{
17534 predicate(UseAVX == 0);
17535 match(Set dst (MulD dst src));
17536
17537 format %{ "mulsd $dst, $src" %}
17538 ins_cost(150);
17539 ins_encode %{
17540 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17541 %}
17542 ins_pipe(pipe_slow);
17543 %}
17544
17545 instruct mulD_mem(regD dst, memory src) %{
17546 predicate(UseAVX == 0);
17547 match(Set dst (MulD dst (LoadD src)));
17548
17549 format %{ "mulsd $dst, $src" %}
17550 ins_cost(150);
17551 ins_encode %{
17552 __ mulsd($dst$$XMMRegister, $src$$Address);
17553 %}
17554 ins_pipe(pipe_slow);
17555 %}
17556
17557 instruct mulD_imm(regD dst, immD con) %{
17558 predicate(UseAVX == 0);
17559 match(Set dst (MulD dst con));
17560 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17561 ins_cost(150);
17562 ins_encode %{
17563 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17564 %}
17565 ins_pipe(pipe_slow);
17566 %}
17567
17568 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17569 predicate(UseAVX > 0);
17570 match(Set dst (MulD src1 src2));
17571
17572 format %{ "vmulsd $dst, $src1, $src2" %}
17573 ins_cost(150);
17574 ins_encode %{
17575 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17576 %}
17577 ins_pipe(pipe_slow);
17578 %}
17579
17580 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17581 predicate(UseAVX > 0);
17582 match(Set dst (MulD src1 (LoadD src2)));
17583
17584 format %{ "vmulsd $dst, $src1, $src2" %}
17585 ins_cost(150);
17586 ins_encode %{
17587 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17588 %}
17589 ins_pipe(pipe_slow);
17590 %}
17591
17592 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17593 predicate(UseAVX > 0);
17594 match(Set dst (MulD src con));
17595
17596 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17597 ins_cost(150);
17598 ins_encode %{
17599 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17600 %}
17601 ins_pipe(pipe_slow);
17602 %}
17603
17604 instruct divF_reg(regF dst, regF src) %{
17605 predicate(UseAVX == 0);
17606 match(Set dst (DivF dst src));
17607
17608 format %{ "divss $dst, $src" %}
17609 ins_cost(150);
17610 ins_encode %{
17611 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17612 %}
17613 ins_pipe(pipe_slow);
17614 %}
17615
17616 instruct divF_mem(regF dst, memory src) %{
17617 predicate(UseAVX == 0);
17618 match(Set dst (DivF dst (LoadF src)));
17619
17620 format %{ "divss $dst, $src" %}
17621 ins_cost(150);
17622 ins_encode %{
17623 __ divss($dst$$XMMRegister, $src$$Address);
17624 %}
17625 ins_pipe(pipe_slow);
17626 %}
17627
17628 instruct divF_imm(regF dst, immF con) %{
17629 predicate(UseAVX == 0);
17630 match(Set dst (DivF dst con));
17631 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17632 ins_cost(150);
17633 ins_encode %{
17634 __ divss($dst$$XMMRegister, $constantaddress($con));
17635 %}
17636 ins_pipe(pipe_slow);
17637 %}
17638
17639 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17640 predicate(UseAVX > 0);
17641 match(Set dst (DivF src1 src2));
17642
17643 format %{ "vdivss $dst, $src1, $src2" %}
17644 ins_cost(150);
17645 ins_encode %{
17646 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17647 %}
17648 ins_pipe(pipe_slow);
17649 %}
17650
17651 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17652 predicate(UseAVX > 0);
17653 match(Set dst (DivF src1 (LoadF src2)));
17654
17655 format %{ "vdivss $dst, $src1, $src2" %}
17656 ins_cost(150);
17657 ins_encode %{
17658 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17659 %}
17660 ins_pipe(pipe_slow);
17661 %}
17662
17663 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17664 predicate(UseAVX > 0);
17665 match(Set dst (DivF src con));
17666
17667 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17668 ins_cost(150);
17669 ins_encode %{
17670 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17671 %}
17672 ins_pipe(pipe_slow);
17673 %}
17674
17675 instruct divD_reg(regD dst, regD src) %{
17676 predicate(UseAVX == 0);
17677 match(Set dst (DivD dst src));
17678
17679 format %{ "divsd $dst, $src" %}
17680 ins_cost(150);
17681 ins_encode %{
17682 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17683 %}
17684 ins_pipe(pipe_slow);
17685 %}
17686
17687 instruct divD_mem(regD dst, memory src) %{
17688 predicate(UseAVX == 0);
17689 match(Set dst (DivD dst (LoadD src)));
17690
17691 format %{ "divsd $dst, $src" %}
17692 ins_cost(150);
17693 ins_encode %{
17694 __ divsd($dst$$XMMRegister, $src$$Address);
17695 %}
17696 ins_pipe(pipe_slow);
17697 %}
17698
17699 instruct divD_imm(regD dst, immD con) %{
17700 predicate(UseAVX == 0);
17701 match(Set dst (DivD dst con));
17702 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17703 ins_cost(150);
17704 ins_encode %{
17705 __ divsd($dst$$XMMRegister, $constantaddress($con));
17706 %}
17707 ins_pipe(pipe_slow);
17708 %}
17709
17710 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17711 predicate(UseAVX > 0);
17712 match(Set dst (DivD src1 src2));
17713
17714 format %{ "vdivsd $dst, $src1, $src2" %}
17715 ins_cost(150);
17716 ins_encode %{
17717 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17718 %}
17719 ins_pipe(pipe_slow);
17720 %}
17721
17722 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17723 predicate(UseAVX > 0);
17724 match(Set dst (DivD src1 (LoadD src2)));
17725
17726 format %{ "vdivsd $dst, $src1, $src2" %}
17727 ins_cost(150);
17728 ins_encode %{
17729 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17730 %}
17731 ins_pipe(pipe_slow);
17732 %}
17733
17734 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17735 predicate(UseAVX > 0);
17736 match(Set dst (DivD src con));
17737
17738 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17739 ins_cost(150);
17740 ins_encode %{
17741 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17742 %}
17743 ins_pipe(pipe_slow);
17744 %}
17745
17746 instruct absF_reg(regF dst) %{
17747 predicate(UseAVX == 0);
17748 match(Set dst (AbsF dst));
17749 ins_cost(150);
17750 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17751 ins_encode %{
17752 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17753 %}
17754 ins_pipe(pipe_slow);
17755 %}
17756
17757 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17758 predicate(UseAVX > 0);
17759 match(Set dst (AbsF src));
17760 ins_cost(150);
17761 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17762 ins_encode %{
17763 int vlen_enc = Assembler::AVX_128bit;
17764 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17765 ExternalAddress(float_signmask()), vlen_enc);
17766 %}
17767 ins_pipe(pipe_slow);
17768 %}
17769
17770 instruct absD_reg(regD dst) %{
17771 predicate(UseAVX == 0);
17772 match(Set dst (AbsD dst));
17773 ins_cost(150);
17774 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17775 "# abs double by sign masking" %}
17776 ins_encode %{
17777 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17778 %}
17779 ins_pipe(pipe_slow);
17780 %}
17781
17782 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17783 predicate(UseAVX > 0);
17784 match(Set dst (AbsD src));
17785 ins_cost(150);
17786 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17787 "# abs double by sign masking" %}
17788 ins_encode %{
17789 int vlen_enc = Assembler::AVX_128bit;
17790 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17791 ExternalAddress(double_signmask()), vlen_enc);
17792 %}
17793 ins_pipe(pipe_slow);
17794 %}
17795
17796 instruct negF_reg(regF dst) %{
17797 predicate(UseAVX == 0);
17798 match(Set dst (NegF dst));
17799 ins_cost(150);
17800 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17801 ins_encode %{
17802 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17803 %}
17804 ins_pipe(pipe_slow);
17805 %}
17806
17807 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17808 predicate(UseAVX > 0);
17809 match(Set dst (NegF src));
17810 ins_cost(150);
17811 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17812 ins_encode %{
17813 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17814 ExternalAddress(float_signflip()));
17815 %}
17816 ins_pipe(pipe_slow);
17817 %}
17818
17819 instruct negD_reg(regD dst) %{
17820 predicate(UseAVX == 0);
17821 match(Set dst (NegD dst));
17822 ins_cost(150);
17823 format %{ "xorpd $dst, [0x8000000000000000]\t"
17824 "# neg double by sign flipping" %}
17825 ins_encode %{
17826 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17827 %}
17828 ins_pipe(pipe_slow);
17829 %}
17830
17831 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17832 predicate(UseAVX > 0);
17833 match(Set dst (NegD src));
17834 ins_cost(150);
17835 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17836 "# neg double by sign flipping" %}
17837 ins_encode %{
17838 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17839 ExternalAddress(double_signflip()));
17840 %}
17841 ins_pipe(pipe_slow);
17842 %}
17843
17844 // sqrtss instruction needs destination register to be pre initialized for best performance
17845 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17846 instruct sqrtF_reg(regF dst) %{
17847 match(Set dst (SqrtF dst));
17848 format %{ "sqrtss $dst, $dst" %}
17849 ins_encode %{
17850 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17851 %}
17852 ins_pipe(pipe_slow);
17853 %}
17854
17855 // sqrtsd instruction needs destination register to be pre initialized for best performance
17856 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17857 instruct sqrtD_reg(regD dst) %{
17858 match(Set dst (SqrtD dst));
17859 format %{ "sqrtsd $dst, $dst" %}
17860 ins_encode %{
17861 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17862 %}
17863 ins_pipe(pipe_slow);
17864 %}
17865
17866 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17867 effect(TEMP tmp);
17868 match(Set dst (ConvF2HF src));
17869 ins_cost(125);
17870 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17871 ins_encode %{
17872 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17873 %}
17874 ins_pipe( pipe_slow );
17875 %}
17876
17877 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17878 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17879 effect(TEMP ktmp, TEMP rtmp);
17880 match(Set mem (StoreC mem (ConvF2HF src)));
17881 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17882 ins_encode %{
17883 __ movl($rtmp$$Register, 0x1);
17884 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17885 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17886 %}
17887 ins_pipe( pipe_slow );
17888 %}
17889
17890 instruct vconvF2HF(vec dst, vec src) %{
17891 match(Set dst (VectorCastF2HF src));
17892 format %{ "vector_conv_F2HF $dst $src" %}
17893 ins_encode %{
17894 int vlen_enc = vector_length_encoding(this, $src);
17895 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17896 %}
17897 ins_pipe( pipe_slow );
17898 %}
17899
17900 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17901 predicate(n->as_StoreVector()->memory_size() >= 16);
17902 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17903 format %{ "vcvtps2ph $mem,$src" %}
17904 ins_encode %{
17905 int vlen_enc = vector_length_encoding(this, $src);
17906 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17907 %}
17908 ins_pipe( pipe_slow );
17909 %}
17910
17911 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17912 match(Set dst (ConvHF2F src));
17913 format %{ "vcvtph2ps $dst,$src" %}
17914 ins_encode %{
17915 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17916 %}
17917 ins_pipe( pipe_slow );
17918 %}
17919
17920 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17921 match(Set dst (VectorCastHF2F (LoadVector mem)));
17922 format %{ "vcvtph2ps $dst,$mem" %}
17923 ins_encode %{
17924 int vlen_enc = vector_length_encoding(this);
17925 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17926 %}
17927 ins_pipe( pipe_slow );
17928 %}
17929
17930 instruct vconvHF2F(vec dst, vec src) %{
17931 match(Set dst (VectorCastHF2F src));
17932 ins_cost(125);
17933 format %{ "vector_conv_HF2F $dst,$src" %}
17934 ins_encode %{
17935 int vlen_enc = vector_length_encoding(this);
17936 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17937 %}
17938 ins_pipe( pipe_slow );
17939 %}
17940
17941 // ---------------------------------------- VectorReinterpret ------------------------------------
17942 instruct reinterpret_mask(kReg dst) %{
17943 predicate(n->bottom_type()->isa_vectmask() &&
17944 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17945 match(Set dst (VectorReinterpret dst));
17946 ins_cost(125);
17947 format %{ "vector_reinterpret $dst\t!" %}
17948 ins_encode %{
17949 // empty
17950 %}
17951 ins_pipe( pipe_slow );
17952 %}
17953
17954 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17955 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17956 n->bottom_type()->isa_vectmask() &&
17957 n->in(1)->bottom_type()->isa_vectmask() &&
17958 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
17959 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17960 match(Set dst (VectorReinterpret src));
17961 effect(TEMP xtmp);
17962 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17963 ins_encode %{
17964 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17965 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17966 assert(src_sz == dst_sz , "src and dst size mismatch");
17967 int vlen_enc = vector_length_encoding(src_sz);
17968 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17969 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17970 %}
17971 ins_pipe( pipe_slow );
17972 %}
17973
17974 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17975 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17976 n->bottom_type()->isa_vectmask() &&
17977 n->in(1)->bottom_type()->isa_vectmask() &&
17978 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
17979 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
17980 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17981 match(Set dst (VectorReinterpret src));
17982 effect(TEMP xtmp);
17983 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17984 ins_encode %{
17985 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17986 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17987 assert(src_sz == dst_sz , "src and dst size mismatch");
17988 int vlen_enc = vector_length_encoding(src_sz);
17989 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17990 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17991 %}
17992 ins_pipe( pipe_slow );
17993 %}
17994
17995 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
17996 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17997 n->bottom_type()->isa_vectmask() &&
17998 n->in(1)->bottom_type()->isa_vectmask() &&
17999 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18000 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18001 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18002 match(Set dst (VectorReinterpret src));
18003 effect(TEMP xtmp);
18004 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18005 ins_encode %{
18006 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18007 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18008 assert(src_sz == dst_sz , "src and dst size mismatch");
18009 int vlen_enc = vector_length_encoding(src_sz);
18010 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18011 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18012 %}
18013 ins_pipe( pipe_slow );
18014 %}
18015
18016 instruct reinterpret(vec dst) %{
18017 predicate(!n->bottom_type()->isa_vectmask() &&
18018 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18019 match(Set dst (VectorReinterpret dst));
18020 ins_cost(125);
18021 format %{ "vector_reinterpret $dst\t!" %}
18022 ins_encode %{
18023 // empty
18024 %}
18025 ins_pipe( pipe_slow );
18026 %}
18027
18028 instruct reinterpret_expand(vec dst, vec src) %{
18029 predicate(UseAVX == 0 &&
18030 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18031 match(Set dst (VectorReinterpret src));
18032 ins_cost(125);
18033 effect(TEMP dst);
18034 format %{ "vector_reinterpret_expand $dst,$src" %}
18035 ins_encode %{
18036 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18037 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18038
18039 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18040 if (src_vlen_in_bytes == 4) {
18041 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18042 } else {
18043 assert(src_vlen_in_bytes == 8, "");
18044 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18045 }
18046 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18047 %}
18048 ins_pipe( pipe_slow );
18049 %}
18050
18051 instruct vreinterpret_expand4(legVec dst, vec src) %{
18052 predicate(UseAVX > 0 &&
18053 !n->bottom_type()->isa_vectmask() &&
18054 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18055 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18056 match(Set dst (VectorReinterpret src));
18057 ins_cost(125);
18058 format %{ "vector_reinterpret_expand $dst,$src" %}
18059 ins_encode %{
18060 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18061 %}
18062 ins_pipe( pipe_slow );
18063 %}
18064
18065
18066 instruct vreinterpret_expand(legVec dst, vec src) %{
18067 predicate(UseAVX > 0 &&
18068 !n->bottom_type()->isa_vectmask() &&
18069 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18070 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18071 match(Set dst (VectorReinterpret src));
18072 ins_cost(125);
18073 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18074 ins_encode %{
18075 switch (Matcher::vector_length_in_bytes(this, $src)) {
18076 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18077 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18078 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18079 default: ShouldNotReachHere();
18080 }
18081 %}
18082 ins_pipe( pipe_slow );
18083 %}
18084
18085 instruct reinterpret_shrink(vec dst, legVec src) %{
18086 predicate(!n->bottom_type()->isa_vectmask() &&
18087 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18088 match(Set dst (VectorReinterpret src));
18089 ins_cost(125);
18090 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18091 ins_encode %{
18092 switch (Matcher::vector_length_in_bytes(this)) {
18093 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18094 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18095 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18096 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18097 default: ShouldNotReachHere();
18098 }
18099 %}
18100 ins_pipe( pipe_slow );
18101 %}
18102
18103 // ----------------------------------------------------------------------------------------------------
18104
18105 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18106 match(Set dst (RoundDoubleMode src rmode));
18107 format %{ "roundsd $dst,$src" %}
18108 ins_cost(150);
18109 ins_encode %{
18110 assert(UseSSE >= 4, "required");
18111 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18112 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18113 }
18114 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18115 %}
18116 ins_pipe(pipe_slow);
18117 %}
18118
18119 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18120 match(Set dst (RoundDoubleMode con rmode));
18121 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18122 ins_cost(150);
18123 ins_encode %{
18124 assert(UseSSE >= 4, "required");
18125 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18126 %}
18127 ins_pipe(pipe_slow);
18128 %}
18129
18130 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18131 predicate(Matcher::vector_length(n) < 8);
18132 match(Set dst (RoundDoubleModeV src rmode));
18133 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18134 ins_encode %{
18135 assert(UseAVX > 0, "required");
18136 int vlen_enc = vector_length_encoding(this);
18137 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18138 %}
18139 ins_pipe( pipe_slow );
18140 %}
18141
18142 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18143 predicate(Matcher::vector_length(n) == 8);
18144 match(Set dst (RoundDoubleModeV src rmode));
18145 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18146 ins_encode %{
18147 assert(UseAVX > 2, "required");
18148 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18149 %}
18150 ins_pipe( pipe_slow );
18151 %}
18152
18153 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18154 predicate(Matcher::vector_length(n) < 8);
18155 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18156 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18157 ins_encode %{
18158 assert(UseAVX > 0, "required");
18159 int vlen_enc = vector_length_encoding(this);
18160 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18161 %}
18162 ins_pipe( pipe_slow );
18163 %}
18164
18165 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18166 predicate(Matcher::vector_length(n) == 8);
18167 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18168 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18169 ins_encode %{
18170 assert(UseAVX > 2, "required");
18171 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18172 %}
18173 ins_pipe( pipe_slow );
18174 %}
18175
18176 instruct onspinwait() %{
18177 match(OnSpinWait);
18178 ins_cost(200);
18179
18180 format %{
18181 $$template
18182 $$emit$$"pause\t! membar_onspinwait"
18183 %}
18184 ins_encode %{
18185 __ pause();
18186 %}
18187 ins_pipe(pipe_slow);
18188 %}
18189
18190 // a * b + c
18191 instruct fmaD_reg(regD a, regD b, regD c) %{
18192 match(Set c (FmaD c (Binary a b)));
18193 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18194 ins_cost(150);
18195 ins_encode %{
18196 assert(UseFMA, "Needs FMA instructions support.");
18197 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18198 %}
18199 ins_pipe( pipe_slow );
18200 %}
18201
18202 // a * b + c
18203 instruct fmaF_reg(regF a, regF b, regF c) %{
18204 match(Set c (FmaF c (Binary a b)));
18205 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18206 ins_cost(150);
18207 ins_encode %{
18208 assert(UseFMA, "Needs FMA instructions support.");
18209 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18210 %}
18211 ins_pipe( pipe_slow );
18212 %}
18213
18214 // ====================VECTOR INSTRUCTIONS=====================================
18215
18216 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18217 instruct MoveVec2Leg(legVec dst, vec src) %{
18218 match(Set dst src);
18219 format %{ "" %}
18220 ins_encode %{
18221 ShouldNotReachHere();
18222 %}
18223 ins_pipe( fpu_reg_reg );
18224 %}
18225
18226 instruct MoveLeg2Vec(vec dst, legVec src) %{
18227 match(Set dst src);
18228 format %{ "" %}
18229 ins_encode %{
18230 ShouldNotReachHere();
18231 %}
18232 ins_pipe( fpu_reg_reg );
18233 %}
18234
18235 // ============================================================================
18236
18237 // Load vectors generic operand pattern
18238 instruct loadV(vec dst, memory mem) %{
18239 match(Set dst (LoadVector mem));
18240 ins_cost(125);
18241 format %{ "load_vector $dst,$mem" %}
18242 ins_encode %{
18243 BasicType bt = Matcher::vector_element_basic_type(this);
18244 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18245 %}
18246 ins_pipe( pipe_slow );
18247 %}
18248
18249 // Store vectors generic operand pattern.
18250 instruct storeV(memory mem, vec src) %{
18251 match(Set mem (StoreVector mem src));
18252 ins_cost(145);
18253 format %{ "store_vector $mem,$src\n\t" %}
18254 ins_encode %{
18255 switch (Matcher::vector_length_in_bytes(this, $src)) {
18256 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18257 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18258 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18259 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18260 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18261 default: ShouldNotReachHere();
18262 }
18263 %}
18264 ins_pipe( pipe_slow );
18265 %}
18266
18267 // ---------------------------------------- Gather ------------------------------------
18268
18269 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18270
18271 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18272 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18273 Matcher::vector_length_in_bytes(n) <= 32);
18274 match(Set dst (LoadVectorGather mem idx));
18275 effect(TEMP dst, TEMP tmp, TEMP mask);
18276 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18277 ins_encode %{
18278 int vlen_enc = vector_length_encoding(this);
18279 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18280 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18281 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18282 __ lea($tmp$$Register, $mem$$Address);
18283 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18284 %}
18285 ins_pipe( pipe_slow );
18286 %}
18287
18288
18289 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18290 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18291 !is_subword_type(Matcher::vector_element_basic_type(n)));
18292 match(Set dst (LoadVectorGather mem idx));
18293 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18294 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18295 ins_encode %{
18296 int vlen_enc = vector_length_encoding(this);
18297 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18298 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18299 __ lea($tmp$$Register, $mem$$Address);
18300 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18301 %}
18302 ins_pipe( pipe_slow );
18303 %}
18304
18305 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18306 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18307 !is_subword_type(Matcher::vector_element_basic_type(n)));
18308 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18309 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18310 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18311 ins_encode %{
18312 assert(UseAVX > 2, "sanity");
18313 int vlen_enc = vector_length_encoding(this);
18314 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18315 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18316 // Note: Since gather instruction partially updates the opmask register used
18317 // for predication hense moving mask operand to a temporary.
18318 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18319 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18320 __ lea($tmp$$Register, $mem$$Address);
18321 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18322 %}
18323 ins_pipe( pipe_slow );
18324 %}
18325
18326 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18327 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18328 match(Set dst (LoadVectorGather mem idx_base));
18329 effect(TEMP tmp, TEMP rtmp);
18330 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18331 ins_encode %{
18332 int vlen_enc = vector_length_encoding(this);
18333 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18334 __ lea($tmp$$Register, $mem$$Address);
18335 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18336 %}
18337 ins_pipe( pipe_slow );
18338 %}
18339
18340 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18341 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18342 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18343 match(Set dst (LoadVectorGather mem idx_base));
18344 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18345 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18346 ins_encode %{
18347 int vlen_enc = vector_length_encoding(this);
18348 int vector_len = Matcher::vector_length(this);
18349 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18350 __ lea($tmp$$Register, $mem$$Address);
18351 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18352 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18353 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18354 %}
18355 ins_pipe( pipe_slow );
18356 %}
18357
18358 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18359 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18360 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18361 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18362 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18363 ins_encode %{
18364 int vlen_enc = vector_length_encoding(this);
18365 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18366 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18367 __ lea($tmp$$Register, $mem$$Address);
18368 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18369 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18370 %}
18371 ins_pipe( pipe_slow );
18372 %}
18373
18374 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18375 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18376 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18377 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18378 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18379 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18380 ins_encode %{
18381 int vlen_enc = vector_length_encoding(this);
18382 int vector_len = Matcher::vector_length(this);
18383 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18384 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18385 __ lea($tmp$$Register, $mem$$Address);
18386 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18387 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18388 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18389 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18390 %}
18391 ins_pipe( pipe_slow );
18392 %}
18393
18394 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18395 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18396 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18397 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18398 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18399 ins_encode %{
18400 int vlen_enc = vector_length_encoding(this);
18401 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18402 __ lea($tmp$$Register, $mem$$Address);
18403 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18404 if (elem_bt == T_SHORT) {
18405 __ movl($mask_idx$$Register, 0x55555555);
18406 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18407 }
18408 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18409 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18410 %}
18411 ins_pipe( pipe_slow );
18412 %}
18413
18414 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18415 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18416 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18417 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18418 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18419 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18420 ins_encode %{
18421 int vlen_enc = vector_length_encoding(this);
18422 int vector_len = Matcher::vector_length(this);
18423 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18424 __ lea($tmp$$Register, $mem$$Address);
18425 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18426 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18427 if (elem_bt == T_SHORT) {
18428 __ movl($mask_idx$$Register, 0x55555555);
18429 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18430 }
18431 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18432 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18433 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18434 %}
18435 ins_pipe( pipe_slow );
18436 %}
18437
18438 // ====================Scatter=======================================
18439
18440 // Scatter INT, LONG, FLOAT, DOUBLE
18441
18442 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18443 predicate(UseAVX > 2);
18444 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18445 effect(TEMP tmp, TEMP ktmp);
18446 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18447 ins_encode %{
18448 int vlen_enc = vector_length_encoding(this, $src);
18449 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18450
18451 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18452 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18453
18454 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18455 __ lea($tmp$$Register, $mem$$Address);
18456 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18457 %}
18458 ins_pipe( pipe_slow );
18459 %}
18460
18461 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18462 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18463 effect(TEMP tmp, TEMP ktmp);
18464 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18465 ins_encode %{
18466 int vlen_enc = vector_length_encoding(this, $src);
18467 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18468 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18469 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18470 // Note: Since scatter instruction partially updates the opmask register used
18471 // for predication hense moving mask operand to a temporary.
18472 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18473 __ lea($tmp$$Register, $mem$$Address);
18474 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18475 %}
18476 ins_pipe( pipe_slow );
18477 %}
18478
18479 // ====================REPLICATE=======================================
18480
18481 // Replicate byte scalar to be vector
18482 instruct vReplB_reg(vec dst, rRegI src) %{
18483 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18484 match(Set dst (Replicate src));
18485 format %{ "replicateB $dst,$src" %}
18486 ins_encode %{
18487 uint vlen = Matcher::vector_length(this);
18488 if (UseAVX >= 2) {
18489 int vlen_enc = vector_length_encoding(this);
18490 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18491 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18492 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18493 } else {
18494 __ movdl($dst$$XMMRegister, $src$$Register);
18495 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18496 }
18497 } else {
18498 assert(UseAVX < 2, "");
18499 __ movdl($dst$$XMMRegister, $src$$Register);
18500 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18501 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18502 if (vlen >= 16) {
18503 assert(vlen == 16, "");
18504 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18505 }
18506 }
18507 %}
18508 ins_pipe( pipe_slow );
18509 %}
18510
18511 instruct ReplB_mem(vec dst, memory mem) %{
18512 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18513 match(Set dst (Replicate (LoadB mem)));
18514 format %{ "replicateB $dst,$mem" %}
18515 ins_encode %{
18516 int vlen_enc = vector_length_encoding(this);
18517 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18518 %}
18519 ins_pipe( pipe_slow );
18520 %}
18521
18522 // ====================ReplicateS=======================================
18523
18524 instruct vReplS_reg(vec dst, rRegI src) %{
18525 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18526 match(Set dst (Replicate src));
18527 format %{ "replicateS $dst,$src" %}
18528 ins_encode %{
18529 uint vlen = Matcher::vector_length(this);
18530 int vlen_enc = vector_length_encoding(this);
18531 if (UseAVX >= 2) {
18532 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18533 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18534 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18535 } else {
18536 __ movdl($dst$$XMMRegister, $src$$Register);
18537 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18538 }
18539 } else {
18540 assert(UseAVX < 2, "");
18541 __ movdl($dst$$XMMRegister, $src$$Register);
18542 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18543 if (vlen >= 8) {
18544 assert(vlen == 8, "");
18545 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18546 }
18547 }
18548 %}
18549 ins_pipe( pipe_slow );
18550 %}
18551
18552 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18553 match(Set dst (Replicate con));
18554 effect(TEMP rtmp);
18555 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18556 ins_encode %{
18557 int vlen_enc = vector_length_encoding(this);
18558 BasicType bt = Matcher::vector_element_basic_type(this);
18559 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18560 __ movl($rtmp$$Register, $con$$constant);
18561 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18562 %}
18563 ins_pipe( pipe_slow );
18564 %}
18565
18566 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18567 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18568 match(Set dst (Replicate src));
18569 effect(TEMP rtmp);
18570 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18571 ins_encode %{
18572 int vlen_enc = vector_length_encoding(this);
18573 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18574 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18575 %}
18576 ins_pipe( pipe_slow );
18577 %}
18578
18579 instruct ReplS_mem(vec dst, memory mem) %{
18580 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18581 match(Set dst (Replicate (LoadS mem)));
18582 format %{ "replicateS $dst,$mem" %}
18583 ins_encode %{
18584 int vlen_enc = vector_length_encoding(this);
18585 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18586 %}
18587 ins_pipe( pipe_slow );
18588 %}
18589
18590 // ====================ReplicateI=======================================
18591
18592 instruct ReplI_reg(vec dst, rRegI src) %{
18593 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18594 match(Set dst (Replicate src));
18595 format %{ "replicateI $dst,$src" %}
18596 ins_encode %{
18597 uint vlen = Matcher::vector_length(this);
18598 int vlen_enc = vector_length_encoding(this);
18599 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18600 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18601 } else if (VM_Version::supports_avx2()) {
18602 __ movdl($dst$$XMMRegister, $src$$Register);
18603 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18604 } else {
18605 __ movdl($dst$$XMMRegister, $src$$Register);
18606 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18607 }
18608 %}
18609 ins_pipe( pipe_slow );
18610 %}
18611
18612 instruct ReplI_mem(vec dst, memory mem) %{
18613 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18614 match(Set dst (Replicate (LoadI mem)));
18615 format %{ "replicateI $dst,$mem" %}
18616 ins_encode %{
18617 int vlen_enc = vector_length_encoding(this);
18618 if (VM_Version::supports_avx2()) {
18619 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18620 } else if (VM_Version::supports_avx()) {
18621 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18622 } else {
18623 __ movdl($dst$$XMMRegister, $mem$$Address);
18624 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18625 }
18626 %}
18627 ins_pipe( pipe_slow );
18628 %}
18629
18630 instruct ReplI_imm(vec dst, immI con) %{
18631 predicate(Matcher::is_non_long_integral_vector(n));
18632 match(Set dst (Replicate con));
18633 format %{ "replicateI $dst,$con" %}
18634 ins_encode %{
18635 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18636 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18637 type2aelembytes(Matcher::vector_element_basic_type(this))));
18638 BasicType bt = Matcher::vector_element_basic_type(this);
18639 int vlen = Matcher::vector_length_in_bytes(this);
18640 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18641 %}
18642 ins_pipe( pipe_slow );
18643 %}
18644
18645 // Replicate scalar zero to be vector
18646 instruct ReplI_zero(vec dst, immI_0 zero) %{
18647 predicate(Matcher::is_non_long_integral_vector(n));
18648 match(Set dst (Replicate zero));
18649 format %{ "replicateI $dst,$zero" %}
18650 ins_encode %{
18651 int vlen_enc = vector_length_encoding(this);
18652 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18653 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18654 } else {
18655 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18656 }
18657 %}
18658 ins_pipe( fpu_reg_reg );
18659 %}
18660
18661 instruct ReplI_M1(vec dst, immI_M1 con) %{
18662 predicate(Matcher::is_non_long_integral_vector(n));
18663 match(Set dst (Replicate con));
18664 format %{ "vallones $dst" %}
18665 ins_encode %{
18666 int vector_len = vector_length_encoding(this);
18667 __ vallones($dst$$XMMRegister, vector_len);
18668 %}
18669 ins_pipe( pipe_slow );
18670 %}
18671
18672 // ====================ReplicateL=======================================
18673
18674 // Replicate long (8 byte) scalar to be vector
18675 instruct ReplL_reg(vec dst, rRegL src) %{
18676 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18677 match(Set dst (Replicate src));
18678 format %{ "replicateL $dst,$src" %}
18679 ins_encode %{
18680 int vlen = Matcher::vector_length(this);
18681 int vlen_enc = vector_length_encoding(this);
18682 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18683 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18684 } else if (VM_Version::supports_avx2()) {
18685 __ movdq($dst$$XMMRegister, $src$$Register);
18686 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18687 } else {
18688 __ movdq($dst$$XMMRegister, $src$$Register);
18689 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18690 }
18691 %}
18692 ins_pipe( pipe_slow );
18693 %}
18694
18695 instruct ReplL_mem(vec dst, memory mem) %{
18696 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18697 match(Set dst (Replicate (LoadL mem)));
18698 format %{ "replicateL $dst,$mem" %}
18699 ins_encode %{
18700 int vlen_enc = vector_length_encoding(this);
18701 if (VM_Version::supports_avx2()) {
18702 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18703 } else if (VM_Version::supports_sse3()) {
18704 __ movddup($dst$$XMMRegister, $mem$$Address);
18705 } else {
18706 __ movq($dst$$XMMRegister, $mem$$Address);
18707 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18708 }
18709 %}
18710 ins_pipe( pipe_slow );
18711 %}
18712
18713 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18714 instruct ReplL_imm(vec dst, immL con) %{
18715 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18716 match(Set dst (Replicate con));
18717 format %{ "replicateL $dst,$con" %}
18718 ins_encode %{
18719 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18720 int vlen = Matcher::vector_length_in_bytes(this);
18721 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18722 %}
18723 ins_pipe( pipe_slow );
18724 %}
18725
18726 instruct ReplL_zero(vec dst, immL0 zero) %{
18727 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18728 match(Set dst (Replicate zero));
18729 format %{ "replicateL $dst,$zero" %}
18730 ins_encode %{
18731 int vlen_enc = vector_length_encoding(this);
18732 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18733 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18734 } else {
18735 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18736 }
18737 %}
18738 ins_pipe( fpu_reg_reg );
18739 %}
18740
18741 instruct ReplL_M1(vec dst, immL_M1 con) %{
18742 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18743 match(Set dst (Replicate con));
18744 format %{ "vallones $dst" %}
18745 ins_encode %{
18746 int vector_len = vector_length_encoding(this);
18747 __ vallones($dst$$XMMRegister, vector_len);
18748 %}
18749 ins_pipe( pipe_slow );
18750 %}
18751
18752 // ====================ReplicateF=======================================
18753
18754 instruct vReplF_reg(vec dst, vlRegF src) %{
18755 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18756 match(Set dst (Replicate src));
18757 format %{ "replicateF $dst,$src" %}
18758 ins_encode %{
18759 uint vlen = Matcher::vector_length(this);
18760 int vlen_enc = vector_length_encoding(this);
18761 if (vlen <= 4) {
18762 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18763 } else if (VM_Version::supports_avx2()) {
18764 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18765 } else {
18766 assert(vlen == 8, "sanity");
18767 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18768 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18769 }
18770 %}
18771 ins_pipe( pipe_slow );
18772 %}
18773
18774 instruct ReplF_reg(vec dst, vlRegF src) %{
18775 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18776 match(Set dst (Replicate src));
18777 format %{ "replicateF $dst,$src" %}
18778 ins_encode %{
18779 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18780 %}
18781 ins_pipe( pipe_slow );
18782 %}
18783
18784 instruct ReplF_mem(vec dst, memory mem) %{
18785 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18786 match(Set dst (Replicate (LoadF mem)));
18787 format %{ "replicateF $dst,$mem" %}
18788 ins_encode %{
18789 int vlen_enc = vector_length_encoding(this);
18790 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18791 %}
18792 ins_pipe( pipe_slow );
18793 %}
18794
18795 // Replicate float scalar immediate to be vector by loading from const table.
18796 instruct ReplF_imm(vec dst, immF con) %{
18797 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18798 match(Set dst (Replicate con));
18799 format %{ "replicateF $dst,$con" %}
18800 ins_encode %{
18801 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18802 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18803 int vlen = Matcher::vector_length_in_bytes(this);
18804 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18805 %}
18806 ins_pipe( pipe_slow );
18807 %}
18808
18809 instruct ReplF_zero(vec dst, immF0 zero) %{
18810 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18811 match(Set dst (Replicate zero));
18812 format %{ "replicateF $dst,$zero" %}
18813 ins_encode %{
18814 int vlen_enc = vector_length_encoding(this);
18815 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18816 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18817 } else {
18818 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18819 }
18820 %}
18821 ins_pipe( fpu_reg_reg );
18822 %}
18823
18824 // ====================ReplicateD=======================================
18825
18826 // Replicate double (8 bytes) scalar to be vector
18827 instruct vReplD_reg(vec dst, vlRegD src) %{
18828 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18829 match(Set dst (Replicate src));
18830 format %{ "replicateD $dst,$src" %}
18831 ins_encode %{
18832 uint vlen = Matcher::vector_length(this);
18833 int vlen_enc = vector_length_encoding(this);
18834 if (vlen <= 2) {
18835 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18836 } else if (VM_Version::supports_avx2()) {
18837 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18838 } else {
18839 assert(vlen == 4, "sanity");
18840 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18841 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18842 }
18843 %}
18844 ins_pipe( pipe_slow );
18845 %}
18846
18847 instruct ReplD_reg(vec dst, vlRegD src) %{
18848 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18849 match(Set dst (Replicate src));
18850 format %{ "replicateD $dst,$src" %}
18851 ins_encode %{
18852 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18853 %}
18854 ins_pipe( pipe_slow );
18855 %}
18856
18857 instruct ReplD_mem(vec dst, memory mem) %{
18858 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18859 match(Set dst (Replicate (LoadD mem)));
18860 format %{ "replicateD $dst,$mem" %}
18861 ins_encode %{
18862 if (Matcher::vector_length(this) >= 4) {
18863 int vlen_enc = vector_length_encoding(this);
18864 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18865 } else {
18866 __ movddup($dst$$XMMRegister, $mem$$Address);
18867 }
18868 %}
18869 ins_pipe( pipe_slow );
18870 %}
18871
18872 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18873 instruct ReplD_imm(vec dst, immD con) %{
18874 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18875 match(Set dst (Replicate con));
18876 format %{ "replicateD $dst,$con" %}
18877 ins_encode %{
18878 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18879 int vlen = Matcher::vector_length_in_bytes(this);
18880 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18881 %}
18882 ins_pipe( pipe_slow );
18883 %}
18884
18885 instruct ReplD_zero(vec dst, immD0 zero) %{
18886 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18887 match(Set dst (Replicate zero));
18888 format %{ "replicateD $dst,$zero" %}
18889 ins_encode %{
18890 int vlen_enc = vector_length_encoding(this);
18891 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18892 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18893 } else {
18894 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18895 }
18896 %}
18897 ins_pipe( fpu_reg_reg );
18898 %}
18899
18900 // ====================VECTOR INSERT=======================================
18901
18902 instruct insert(vec dst, rRegI val, immU8 idx) %{
18903 predicate(Matcher::vector_length_in_bytes(n) < 32);
18904 match(Set dst (VectorInsert (Binary dst val) idx));
18905 format %{ "vector_insert $dst,$val,$idx" %}
18906 ins_encode %{
18907 assert(UseSSE >= 4, "required");
18908 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18909
18910 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18911
18912 assert(is_integral_type(elem_bt), "");
18913 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18914
18915 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18916 %}
18917 ins_pipe( pipe_slow );
18918 %}
18919
18920 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18921 predicate(Matcher::vector_length_in_bytes(n) == 32);
18922 match(Set dst (VectorInsert (Binary src val) idx));
18923 effect(TEMP vtmp);
18924 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18925 ins_encode %{
18926 int vlen_enc = Assembler::AVX_256bit;
18927 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18928 int elem_per_lane = 16/type2aelembytes(elem_bt);
18929 int log2epr = log2(elem_per_lane);
18930
18931 assert(is_integral_type(elem_bt), "sanity");
18932 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18933
18934 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18935 uint y_idx = ($idx$$constant >> log2epr) & 1;
18936 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18937 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18938 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18939 %}
18940 ins_pipe( pipe_slow );
18941 %}
18942
18943 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18944 predicate(Matcher::vector_length_in_bytes(n) == 64);
18945 match(Set dst (VectorInsert (Binary src val) idx));
18946 effect(TEMP vtmp);
18947 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18948 ins_encode %{
18949 assert(UseAVX > 2, "sanity");
18950
18951 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18952 int elem_per_lane = 16/type2aelembytes(elem_bt);
18953 int log2epr = log2(elem_per_lane);
18954
18955 assert(is_integral_type(elem_bt), "");
18956 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18957
18958 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18959 uint y_idx = ($idx$$constant >> log2epr) & 3;
18960 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18961 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18962 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18963 %}
18964 ins_pipe( pipe_slow );
18965 %}
18966
18967 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18968 predicate(Matcher::vector_length(n) == 2);
18969 match(Set dst (VectorInsert (Binary dst val) idx));
18970 format %{ "vector_insert $dst,$val,$idx" %}
18971 ins_encode %{
18972 assert(UseSSE >= 4, "required");
18973 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18974 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18975
18976 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18977 %}
18978 ins_pipe( pipe_slow );
18979 %}
18980
18981 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18982 predicate(Matcher::vector_length(n) == 4);
18983 match(Set dst (VectorInsert (Binary src val) idx));
18984 effect(TEMP vtmp);
18985 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18986 ins_encode %{
18987 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18988 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18989
18990 uint x_idx = $idx$$constant & right_n_bits(1);
18991 uint y_idx = ($idx$$constant >> 1) & 1;
18992 int vlen_enc = Assembler::AVX_256bit;
18993 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18994 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18995 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18996 %}
18997 ins_pipe( pipe_slow );
18998 %}
18999
19000 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19001 predicate(Matcher::vector_length(n) == 8);
19002 match(Set dst (VectorInsert (Binary src val) idx));
19003 effect(TEMP vtmp);
19004 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19005 ins_encode %{
19006 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19007 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19008
19009 uint x_idx = $idx$$constant & right_n_bits(1);
19010 uint y_idx = ($idx$$constant >> 1) & 3;
19011 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19012 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19013 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19014 %}
19015 ins_pipe( pipe_slow );
19016 %}
19017
19018 instruct insertF(vec dst, regF val, immU8 idx) %{
19019 predicate(Matcher::vector_length(n) < 8);
19020 match(Set dst (VectorInsert (Binary dst val) idx));
19021 format %{ "vector_insert $dst,$val,$idx" %}
19022 ins_encode %{
19023 assert(UseSSE >= 4, "sanity");
19024
19025 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19026 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19027
19028 uint x_idx = $idx$$constant & right_n_bits(2);
19029 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19030 %}
19031 ins_pipe( pipe_slow );
19032 %}
19033
19034 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19035 predicate(Matcher::vector_length(n) >= 8);
19036 match(Set dst (VectorInsert (Binary src val) idx));
19037 effect(TEMP vtmp);
19038 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19039 ins_encode %{
19040 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19041 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19042
19043 int vlen = Matcher::vector_length(this);
19044 uint x_idx = $idx$$constant & right_n_bits(2);
19045 if (vlen == 8) {
19046 uint y_idx = ($idx$$constant >> 2) & 1;
19047 int vlen_enc = Assembler::AVX_256bit;
19048 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19049 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19050 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19051 } else {
19052 assert(vlen == 16, "sanity");
19053 uint y_idx = ($idx$$constant >> 2) & 3;
19054 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19055 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19056 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19057 }
19058 %}
19059 ins_pipe( pipe_slow );
19060 %}
19061
19062 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19063 predicate(Matcher::vector_length(n) == 2);
19064 match(Set dst (VectorInsert (Binary dst val) idx));
19065 effect(TEMP tmp);
19066 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19067 ins_encode %{
19068 assert(UseSSE >= 4, "sanity");
19069 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19070 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19071
19072 __ movq($tmp$$Register, $val$$XMMRegister);
19073 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19074 %}
19075 ins_pipe( pipe_slow );
19076 %}
19077
19078 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19079 predicate(Matcher::vector_length(n) == 4);
19080 match(Set dst (VectorInsert (Binary src val) idx));
19081 effect(TEMP vtmp, TEMP tmp);
19082 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19083 ins_encode %{
19084 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19085 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19086
19087 uint x_idx = $idx$$constant & right_n_bits(1);
19088 uint y_idx = ($idx$$constant >> 1) & 1;
19089 int vlen_enc = Assembler::AVX_256bit;
19090 __ movq($tmp$$Register, $val$$XMMRegister);
19091 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19092 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19093 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19094 %}
19095 ins_pipe( pipe_slow );
19096 %}
19097
19098 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19099 predicate(Matcher::vector_length(n) == 8);
19100 match(Set dst (VectorInsert (Binary src val) idx));
19101 effect(TEMP tmp, TEMP vtmp);
19102 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19103 ins_encode %{
19104 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19105 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19106
19107 uint x_idx = $idx$$constant & right_n_bits(1);
19108 uint y_idx = ($idx$$constant >> 1) & 3;
19109 __ movq($tmp$$Register, $val$$XMMRegister);
19110 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19111 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19112 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19113 %}
19114 ins_pipe( pipe_slow );
19115 %}
19116
19117 // ====================REDUCTION ARITHMETIC=======================================
19118
19119 // =======================Int Reduction==========================================
19120
19121 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19122 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19123 match(Set dst (AddReductionVI src1 src2));
19124 match(Set dst (MulReductionVI src1 src2));
19125 match(Set dst (AndReductionV src1 src2));
19126 match(Set dst ( OrReductionV src1 src2));
19127 match(Set dst (XorReductionV src1 src2));
19128 match(Set dst (MinReductionV src1 src2));
19129 match(Set dst (MaxReductionV src1 src2));
19130 effect(TEMP vtmp1, TEMP vtmp2);
19131 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19132 ins_encode %{
19133 int opcode = this->ideal_Opcode();
19134 int vlen = Matcher::vector_length(this, $src2);
19135 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19136 %}
19137 ins_pipe( pipe_slow );
19138 %}
19139
19140 // =======================Long Reduction==========================================
19141
19142 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19143 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19144 match(Set dst (AddReductionVL src1 src2));
19145 match(Set dst (MulReductionVL src1 src2));
19146 match(Set dst (AndReductionV src1 src2));
19147 match(Set dst ( OrReductionV src1 src2));
19148 match(Set dst (XorReductionV src1 src2));
19149 match(Set dst (MinReductionV src1 src2));
19150 match(Set dst (MaxReductionV src1 src2));
19151 effect(TEMP vtmp1, TEMP vtmp2);
19152 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19153 ins_encode %{
19154 int opcode = this->ideal_Opcode();
19155 int vlen = Matcher::vector_length(this, $src2);
19156 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19157 %}
19158 ins_pipe( pipe_slow );
19159 %}
19160
19161 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19162 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19163 match(Set dst (AddReductionVL src1 src2));
19164 match(Set dst (MulReductionVL src1 src2));
19165 match(Set dst (AndReductionV src1 src2));
19166 match(Set dst ( OrReductionV src1 src2));
19167 match(Set dst (XorReductionV src1 src2));
19168 match(Set dst (MinReductionV src1 src2));
19169 match(Set dst (MaxReductionV src1 src2));
19170 effect(TEMP vtmp1, TEMP vtmp2);
19171 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19172 ins_encode %{
19173 int opcode = this->ideal_Opcode();
19174 int vlen = Matcher::vector_length(this, $src2);
19175 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19176 %}
19177 ins_pipe( pipe_slow );
19178 %}
19179
19180 // =======================Float Reduction==========================================
19181
19182 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19183 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19184 match(Set dst (AddReductionVF dst src));
19185 match(Set dst (MulReductionVF dst src));
19186 effect(TEMP dst, TEMP vtmp);
19187 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19188 ins_encode %{
19189 int opcode = this->ideal_Opcode();
19190 int vlen = Matcher::vector_length(this, $src);
19191 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19192 %}
19193 ins_pipe( pipe_slow );
19194 %}
19195
19196 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19197 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19198 match(Set dst (AddReductionVF dst src));
19199 match(Set dst (MulReductionVF dst src));
19200 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19201 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19202 ins_encode %{
19203 int opcode = this->ideal_Opcode();
19204 int vlen = Matcher::vector_length(this, $src);
19205 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19206 %}
19207 ins_pipe( pipe_slow );
19208 %}
19209
19210 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19211 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19212 match(Set dst (AddReductionVF dst src));
19213 match(Set dst (MulReductionVF dst src));
19214 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19215 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19216 ins_encode %{
19217 int opcode = this->ideal_Opcode();
19218 int vlen = Matcher::vector_length(this, $src);
19219 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19220 %}
19221 ins_pipe( pipe_slow );
19222 %}
19223
19224
19225 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19226 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19227 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19228 // src1 contains reduction identity
19229 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19230 match(Set dst (AddReductionVF src1 src2));
19231 match(Set dst (MulReductionVF src1 src2));
19232 effect(TEMP dst);
19233 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19234 ins_encode %{
19235 int opcode = this->ideal_Opcode();
19236 int vlen = Matcher::vector_length(this, $src2);
19237 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19238 %}
19239 ins_pipe( pipe_slow );
19240 %}
19241
19242 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19243 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19244 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19245 // src1 contains reduction identity
19246 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19247 match(Set dst (AddReductionVF src1 src2));
19248 match(Set dst (MulReductionVF src1 src2));
19249 effect(TEMP dst, TEMP vtmp);
19250 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19251 ins_encode %{
19252 int opcode = this->ideal_Opcode();
19253 int vlen = Matcher::vector_length(this, $src2);
19254 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19255 %}
19256 ins_pipe( pipe_slow );
19257 %}
19258
19259 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19260 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19261 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19262 // src1 contains reduction identity
19263 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19264 match(Set dst (AddReductionVF src1 src2));
19265 match(Set dst (MulReductionVF src1 src2));
19266 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19267 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19268 ins_encode %{
19269 int opcode = this->ideal_Opcode();
19270 int vlen = Matcher::vector_length(this, $src2);
19271 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19272 %}
19273 ins_pipe( pipe_slow );
19274 %}
19275
19276 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19277 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19278 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19279 // src1 contains reduction identity
19280 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19281 match(Set dst (AddReductionVF src1 src2));
19282 match(Set dst (MulReductionVF src1 src2));
19283 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19284 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19285 ins_encode %{
19286 int opcode = this->ideal_Opcode();
19287 int vlen = Matcher::vector_length(this, $src2);
19288 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19289 %}
19290 ins_pipe( pipe_slow );
19291 %}
19292
19293 // =======================Double Reduction==========================================
19294
19295 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19296 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19297 match(Set dst (AddReductionVD dst src));
19298 match(Set dst (MulReductionVD dst src));
19299 effect(TEMP dst, TEMP vtmp);
19300 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19301 ins_encode %{
19302 int opcode = this->ideal_Opcode();
19303 int vlen = Matcher::vector_length(this, $src);
19304 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19305 %}
19306 ins_pipe( pipe_slow );
19307 %}
19308
19309 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19310 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19311 match(Set dst (AddReductionVD dst src));
19312 match(Set dst (MulReductionVD dst src));
19313 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19314 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19315 ins_encode %{
19316 int opcode = this->ideal_Opcode();
19317 int vlen = Matcher::vector_length(this, $src);
19318 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19319 %}
19320 ins_pipe( pipe_slow );
19321 %}
19322
19323 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19324 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19325 match(Set dst (AddReductionVD dst src));
19326 match(Set dst (MulReductionVD dst src));
19327 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19328 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19329 ins_encode %{
19330 int opcode = this->ideal_Opcode();
19331 int vlen = Matcher::vector_length(this, $src);
19332 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19333 %}
19334 ins_pipe( pipe_slow );
19335 %}
19336
19337 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19338 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19339 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19340 // src1 contains reduction identity
19341 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19342 match(Set dst (AddReductionVD src1 src2));
19343 match(Set dst (MulReductionVD src1 src2));
19344 effect(TEMP dst);
19345 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19346 ins_encode %{
19347 int opcode = this->ideal_Opcode();
19348 int vlen = Matcher::vector_length(this, $src2);
19349 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19350 %}
19351 ins_pipe( pipe_slow );
19352 %}
19353
19354 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19355 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19356 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19357 // src1 contains reduction identity
19358 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19359 match(Set dst (AddReductionVD src1 src2));
19360 match(Set dst (MulReductionVD src1 src2));
19361 effect(TEMP dst, TEMP vtmp);
19362 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19363 ins_encode %{
19364 int opcode = this->ideal_Opcode();
19365 int vlen = Matcher::vector_length(this, $src2);
19366 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19367 %}
19368 ins_pipe( pipe_slow );
19369 %}
19370
19371 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19372 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19373 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19374 // src1 contains reduction identity
19375 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19376 match(Set dst (AddReductionVD src1 src2));
19377 match(Set dst (MulReductionVD src1 src2));
19378 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19379 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19380 ins_encode %{
19381 int opcode = this->ideal_Opcode();
19382 int vlen = Matcher::vector_length(this, $src2);
19383 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19384 %}
19385 ins_pipe( pipe_slow );
19386 %}
19387
19388 // =======================Byte Reduction==========================================
19389
19390 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19391 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19392 match(Set dst (AddReductionVI src1 src2));
19393 match(Set dst (AndReductionV src1 src2));
19394 match(Set dst ( OrReductionV src1 src2));
19395 match(Set dst (XorReductionV src1 src2));
19396 match(Set dst (MinReductionV src1 src2));
19397 match(Set dst (MaxReductionV src1 src2));
19398 effect(TEMP vtmp1, TEMP vtmp2);
19399 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19400 ins_encode %{
19401 int opcode = this->ideal_Opcode();
19402 int vlen = Matcher::vector_length(this, $src2);
19403 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19404 %}
19405 ins_pipe( pipe_slow );
19406 %}
19407
19408 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19409 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19410 match(Set dst (AddReductionVI src1 src2));
19411 match(Set dst (AndReductionV src1 src2));
19412 match(Set dst ( OrReductionV src1 src2));
19413 match(Set dst (XorReductionV src1 src2));
19414 match(Set dst (MinReductionV src1 src2));
19415 match(Set dst (MaxReductionV src1 src2));
19416 effect(TEMP vtmp1, TEMP vtmp2);
19417 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19418 ins_encode %{
19419 int opcode = this->ideal_Opcode();
19420 int vlen = Matcher::vector_length(this, $src2);
19421 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19422 %}
19423 ins_pipe( pipe_slow );
19424 %}
19425
19426 // =======================Short Reduction==========================================
19427
19428 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19429 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19430 match(Set dst (AddReductionVI src1 src2));
19431 match(Set dst (MulReductionVI src1 src2));
19432 match(Set dst (AndReductionV src1 src2));
19433 match(Set dst ( OrReductionV src1 src2));
19434 match(Set dst (XorReductionV src1 src2));
19435 match(Set dst (MinReductionV src1 src2));
19436 match(Set dst (MaxReductionV src1 src2));
19437 effect(TEMP vtmp1, TEMP vtmp2);
19438 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19439 ins_encode %{
19440 int opcode = this->ideal_Opcode();
19441 int vlen = Matcher::vector_length(this, $src2);
19442 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19443 %}
19444 ins_pipe( pipe_slow );
19445 %}
19446
19447 // =======================Mul Reduction==========================================
19448
19449 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19450 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19451 Matcher::vector_length(n->in(2)) <= 32); // src2
19452 match(Set dst (MulReductionVI src1 src2));
19453 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19454 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19455 ins_encode %{
19456 int opcode = this->ideal_Opcode();
19457 int vlen = Matcher::vector_length(this, $src2);
19458 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19459 %}
19460 ins_pipe( pipe_slow );
19461 %}
19462
19463 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19464 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19465 Matcher::vector_length(n->in(2)) == 64); // src2
19466 match(Set dst (MulReductionVI src1 src2));
19467 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19468 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19469 ins_encode %{
19470 int opcode = this->ideal_Opcode();
19471 int vlen = Matcher::vector_length(this, $src2);
19472 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19473 %}
19474 ins_pipe( pipe_slow );
19475 %}
19476
19477 //--------------------Min/Max Float Reduction --------------------
19478 // Float Min Reduction
19479 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19480 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19481 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19482 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19483 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19484 Matcher::vector_length(n->in(2)) == 2);
19485 match(Set dst (MinReductionV src1 src2));
19486 match(Set dst (MaxReductionV src1 src2));
19487 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19488 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19489 ins_encode %{
19490 assert(UseAVX > 0, "sanity");
19491
19492 int opcode = this->ideal_Opcode();
19493 int vlen = Matcher::vector_length(this, $src2);
19494 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19495 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19496 %}
19497 ins_pipe( pipe_slow );
19498 %}
19499
19500 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19501 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19502 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19503 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19504 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19505 Matcher::vector_length(n->in(2)) >= 4);
19506 match(Set dst (MinReductionV src1 src2));
19507 match(Set dst (MaxReductionV src1 src2));
19508 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19509 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19510 ins_encode %{
19511 assert(UseAVX > 0, "sanity");
19512
19513 int opcode = this->ideal_Opcode();
19514 int vlen = Matcher::vector_length(this, $src2);
19515 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19516 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19517 %}
19518 ins_pipe( pipe_slow );
19519 %}
19520
19521 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19522 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19523 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19524 Matcher::vector_length(n->in(2)) == 2);
19525 match(Set dst (MinReductionV dst src));
19526 match(Set dst (MaxReductionV dst src));
19527 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19528 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19529 ins_encode %{
19530 assert(UseAVX > 0, "sanity");
19531
19532 int opcode = this->ideal_Opcode();
19533 int vlen = Matcher::vector_length(this, $src);
19534 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19535 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19536 %}
19537 ins_pipe( pipe_slow );
19538 %}
19539
19540
19541 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19542 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19543 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19544 Matcher::vector_length(n->in(2)) >= 4);
19545 match(Set dst (MinReductionV dst src));
19546 match(Set dst (MaxReductionV dst src));
19547 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19548 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19549 ins_encode %{
19550 assert(UseAVX > 0, "sanity");
19551
19552 int opcode = this->ideal_Opcode();
19553 int vlen = Matcher::vector_length(this, $src);
19554 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19555 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19556 %}
19557 ins_pipe( pipe_slow );
19558 %}
19559
19560 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19561 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19562 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19563 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19564 Matcher::vector_length(n->in(2)) == 2);
19565 match(Set dst (MinReductionV src1 src2));
19566 match(Set dst (MaxReductionV src1 src2));
19567 effect(TEMP dst, TEMP xtmp1);
19568 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19569 ins_encode %{
19570 int opcode = this->ideal_Opcode();
19571 int vlen = Matcher::vector_length(this, $src2);
19572 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19573 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19574 %}
19575 ins_pipe( pipe_slow );
19576 %}
19577
19578 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19579 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19580 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19581 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19582 Matcher::vector_length(n->in(2)) >= 4);
19583 match(Set dst (MinReductionV src1 src2));
19584 match(Set dst (MaxReductionV src1 src2));
19585 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19586 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19587 ins_encode %{
19588 int opcode = this->ideal_Opcode();
19589 int vlen = Matcher::vector_length(this, $src2);
19590 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19591 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19592 %}
19593 ins_pipe( pipe_slow );
19594 %}
19595
19596 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19597 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19598 Matcher::vector_length(n->in(2)) == 2);
19599 match(Set dst (MinReductionV dst src));
19600 match(Set dst (MaxReductionV dst src));
19601 effect(TEMP dst, TEMP xtmp1);
19602 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19603 ins_encode %{
19604 int opcode = this->ideal_Opcode();
19605 int vlen = Matcher::vector_length(this, $src);
19606 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19607 $xtmp1$$XMMRegister);
19608 %}
19609 ins_pipe( pipe_slow );
19610 %}
19611
19612 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19613 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19614 Matcher::vector_length(n->in(2)) >= 4);
19615 match(Set dst (MinReductionV dst src));
19616 match(Set dst (MaxReductionV dst src));
19617 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19618 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19619 ins_encode %{
19620 int opcode = this->ideal_Opcode();
19621 int vlen = Matcher::vector_length(this, $src);
19622 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19623 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19624 %}
19625 ins_pipe( pipe_slow );
19626 %}
19627
19628 //--------------------Min Double Reduction --------------------
19629 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19630 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19631 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19632 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19633 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19634 Matcher::vector_length(n->in(2)) == 2);
19635 match(Set dst (MinReductionV src1 src2));
19636 match(Set dst (MaxReductionV src1 src2));
19637 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19638 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19639 ins_encode %{
19640 assert(UseAVX > 0, "sanity");
19641
19642 int opcode = this->ideal_Opcode();
19643 int vlen = Matcher::vector_length(this, $src2);
19644 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19645 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19646 %}
19647 ins_pipe( pipe_slow );
19648 %}
19649
19650 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19651 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19652 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19653 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19654 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19655 Matcher::vector_length(n->in(2)) >= 4);
19656 match(Set dst (MinReductionV src1 src2));
19657 match(Set dst (MaxReductionV src1 src2));
19658 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19659 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19660 ins_encode %{
19661 assert(UseAVX > 0, "sanity");
19662
19663 int opcode = this->ideal_Opcode();
19664 int vlen = Matcher::vector_length(this, $src2);
19665 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19666 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19667 %}
19668 ins_pipe( pipe_slow );
19669 %}
19670
19671
19672 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19673 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19674 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19675 Matcher::vector_length(n->in(2)) == 2);
19676 match(Set dst (MinReductionV dst src));
19677 match(Set dst (MaxReductionV dst src));
19678 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19679 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19680 ins_encode %{
19681 assert(UseAVX > 0, "sanity");
19682
19683 int opcode = this->ideal_Opcode();
19684 int vlen = Matcher::vector_length(this, $src);
19685 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19686 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19687 %}
19688 ins_pipe( pipe_slow );
19689 %}
19690
19691 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19692 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19693 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19694 Matcher::vector_length(n->in(2)) >= 4);
19695 match(Set dst (MinReductionV dst src));
19696 match(Set dst (MaxReductionV dst src));
19697 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19698 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19699 ins_encode %{
19700 assert(UseAVX > 0, "sanity");
19701
19702 int opcode = this->ideal_Opcode();
19703 int vlen = Matcher::vector_length(this, $src);
19704 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19705 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19706 %}
19707 ins_pipe( pipe_slow );
19708 %}
19709
19710 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19711 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19712 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19713 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19714 Matcher::vector_length(n->in(2)) == 2);
19715 match(Set dst (MinReductionV src1 src2));
19716 match(Set dst (MaxReductionV src1 src2));
19717 effect(TEMP dst, TEMP xtmp1);
19718 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19719 ins_encode %{
19720 int opcode = this->ideal_Opcode();
19721 int vlen = Matcher::vector_length(this, $src2);
19722 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19723 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19724 %}
19725 ins_pipe( pipe_slow );
19726 %}
19727
19728 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19729 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19730 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19731 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19732 Matcher::vector_length(n->in(2)) >= 4);
19733 match(Set dst (MinReductionV src1 src2));
19734 match(Set dst (MaxReductionV src1 src2));
19735 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19736 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19737 ins_encode %{
19738 int opcode = this->ideal_Opcode();
19739 int vlen = Matcher::vector_length(this, $src2);
19740 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19741 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19742 %}
19743 ins_pipe( pipe_slow );
19744 %}
19745
19746
19747 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19748 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19749 Matcher::vector_length(n->in(2)) == 2);
19750 match(Set dst (MinReductionV dst src));
19751 match(Set dst (MaxReductionV dst src));
19752 effect(TEMP dst, TEMP xtmp1);
19753 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19754 ins_encode %{
19755 int opcode = this->ideal_Opcode();
19756 int vlen = Matcher::vector_length(this, $src);
19757 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19758 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19759 %}
19760 ins_pipe( pipe_slow );
19761 %}
19762
19763 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19764 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19765 Matcher::vector_length(n->in(2)) >= 4);
19766 match(Set dst (MinReductionV dst src));
19767 match(Set dst (MaxReductionV dst src));
19768 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19769 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19770 ins_encode %{
19771 int opcode = this->ideal_Opcode();
19772 int vlen = Matcher::vector_length(this, $src);
19773 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19774 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19775 %}
19776 ins_pipe( pipe_slow );
19777 %}
19778
19779 // ====================VECTOR ARITHMETIC=======================================
19780
19781 // --------------------------------- ADD --------------------------------------
19782
19783 // Bytes vector add
19784 instruct vaddB(vec dst, vec src) %{
19785 predicate(UseAVX == 0);
19786 match(Set dst (AddVB dst src));
19787 format %{ "paddb $dst,$src\t! add packedB" %}
19788 ins_encode %{
19789 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19790 %}
19791 ins_pipe( pipe_slow );
19792 %}
19793
19794 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19795 predicate(UseAVX > 0);
19796 match(Set dst (AddVB src1 src2));
19797 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19798 ins_encode %{
19799 int vlen_enc = vector_length_encoding(this);
19800 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19801 %}
19802 ins_pipe( pipe_slow );
19803 %}
19804
19805 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19806 predicate((UseAVX > 0) &&
19807 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19808 match(Set dst (AddVB src (LoadVector mem)));
19809 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19810 ins_encode %{
19811 int vlen_enc = vector_length_encoding(this);
19812 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19813 %}
19814 ins_pipe( pipe_slow );
19815 %}
19816
19817 // Shorts/Chars vector add
19818 instruct vaddS(vec dst, vec src) %{
19819 predicate(UseAVX == 0);
19820 match(Set dst (AddVS dst src));
19821 format %{ "paddw $dst,$src\t! add packedS" %}
19822 ins_encode %{
19823 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19824 %}
19825 ins_pipe( pipe_slow );
19826 %}
19827
19828 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19829 predicate(UseAVX > 0);
19830 match(Set dst (AddVS src1 src2));
19831 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19832 ins_encode %{
19833 int vlen_enc = vector_length_encoding(this);
19834 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19835 %}
19836 ins_pipe( pipe_slow );
19837 %}
19838
19839 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19840 predicate((UseAVX > 0) &&
19841 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19842 match(Set dst (AddVS src (LoadVector mem)));
19843 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19844 ins_encode %{
19845 int vlen_enc = vector_length_encoding(this);
19846 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19847 %}
19848 ins_pipe( pipe_slow );
19849 %}
19850
19851 // Integers vector add
19852 instruct vaddI(vec dst, vec src) %{
19853 predicate(UseAVX == 0);
19854 match(Set dst (AddVI dst src));
19855 format %{ "paddd $dst,$src\t! add packedI" %}
19856 ins_encode %{
19857 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19858 %}
19859 ins_pipe( pipe_slow );
19860 %}
19861
19862 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19863 predicate(UseAVX > 0);
19864 match(Set dst (AddVI src1 src2));
19865 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19866 ins_encode %{
19867 int vlen_enc = vector_length_encoding(this);
19868 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19869 %}
19870 ins_pipe( pipe_slow );
19871 %}
19872
19873
19874 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19875 predicate((UseAVX > 0) &&
19876 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19877 match(Set dst (AddVI src (LoadVector mem)));
19878 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19879 ins_encode %{
19880 int vlen_enc = vector_length_encoding(this);
19881 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19882 %}
19883 ins_pipe( pipe_slow );
19884 %}
19885
19886 // Longs vector add
19887 instruct vaddL(vec dst, vec src) %{
19888 predicate(UseAVX == 0);
19889 match(Set dst (AddVL dst src));
19890 format %{ "paddq $dst,$src\t! add packedL" %}
19891 ins_encode %{
19892 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19893 %}
19894 ins_pipe( pipe_slow );
19895 %}
19896
19897 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19898 predicate(UseAVX > 0);
19899 match(Set dst (AddVL src1 src2));
19900 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19901 ins_encode %{
19902 int vlen_enc = vector_length_encoding(this);
19903 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19904 %}
19905 ins_pipe( pipe_slow );
19906 %}
19907
19908 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19909 predicate((UseAVX > 0) &&
19910 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19911 match(Set dst (AddVL src (LoadVector mem)));
19912 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19913 ins_encode %{
19914 int vlen_enc = vector_length_encoding(this);
19915 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19916 %}
19917 ins_pipe( pipe_slow );
19918 %}
19919
19920 // Floats vector add
19921 instruct vaddF(vec dst, vec src) %{
19922 predicate(UseAVX == 0);
19923 match(Set dst (AddVF dst src));
19924 format %{ "addps $dst,$src\t! add packedF" %}
19925 ins_encode %{
19926 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19927 %}
19928 ins_pipe( pipe_slow );
19929 %}
19930
19931 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19932 predicate(UseAVX > 0);
19933 match(Set dst (AddVF src1 src2));
19934 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
19935 ins_encode %{
19936 int vlen_enc = vector_length_encoding(this);
19937 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19938 %}
19939 ins_pipe( pipe_slow );
19940 %}
19941
19942 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19943 predicate((UseAVX > 0) &&
19944 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19945 match(Set dst (AddVF src (LoadVector mem)));
19946 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
19947 ins_encode %{
19948 int vlen_enc = vector_length_encoding(this);
19949 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19950 %}
19951 ins_pipe( pipe_slow );
19952 %}
19953
19954 // Doubles vector add
19955 instruct vaddD(vec dst, vec src) %{
19956 predicate(UseAVX == 0);
19957 match(Set dst (AddVD dst src));
19958 format %{ "addpd $dst,$src\t! add packedD" %}
19959 ins_encode %{
19960 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19961 %}
19962 ins_pipe( pipe_slow );
19963 %}
19964
19965 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19966 predicate(UseAVX > 0);
19967 match(Set dst (AddVD src1 src2));
19968 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
19969 ins_encode %{
19970 int vlen_enc = vector_length_encoding(this);
19971 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19972 %}
19973 ins_pipe( pipe_slow );
19974 %}
19975
19976 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19977 predicate((UseAVX > 0) &&
19978 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19979 match(Set dst (AddVD src (LoadVector mem)));
19980 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
19981 ins_encode %{
19982 int vlen_enc = vector_length_encoding(this);
19983 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19984 %}
19985 ins_pipe( pipe_slow );
19986 %}
19987
19988 // --------------------------------- SUB --------------------------------------
19989
19990 // Bytes vector sub
19991 instruct vsubB(vec dst, vec src) %{
19992 predicate(UseAVX == 0);
19993 match(Set dst (SubVB dst src));
19994 format %{ "psubb $dst,$src\t! sub packedB" %}
19995 ins_encode %{
19996 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
19997 %}
19998 ins_pipe( pipe_slow );
19999 %}
20000
20001 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20002 predicate(UseAVX > 0);
20003 match(Set dst (SubVB src1 src2));
20004 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20005 ins_encode %{
20006 int vlen_enc = vector_length_encoding(this);
20007 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20008 %}
20009 ins_pipe( pipe_slow );
20010 %}
20011
20012 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20013 predicate((UseAVX > 0) &&
20014 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20015 match(Set dst (SubVB src (LoadVector mem)));
20016 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20017 ins_encode %{
20018 int vlen_enc = vector_length_encoding(this);
20019 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20020 %}
20021 ins_pipe( pipe_slow );
20022 %}
20023
20024 // Shorts/Chars vector sub
20025 instruct vsubS(vec dst, vec src) %{
20026 predicate(UseAVX == 0);
20027 match(Set dst (SubVS dst src));
20028 format %{ "psubw $dst,$src\t! sub packedS" %}
20029 ins_encode %{
20030 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20031 %}
20032 ins_pipe( pipe_slow );
20033 %}
20034
20035
20036 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20037 predicate(UseAVX > 0);
20038 match(Set dst (SubVS src1 src2));
20039 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20040 ins_encode %{
20041 int vlen_enc = vector_length_encoding(this);
20042 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20043 %}
20044 ins_pipe( pipe_slow );
20045 %}
20046
20047 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20048 predicate((UseAVX > 0) &&
20049 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20050 match(Set dst (SubVS src (LoadVector mem)));
20051 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20052 ins_encode %{
20053 int vlen_enc = vector_length_encoding(this);
20054 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20055 %}
20056 ins_pipe( pipe_slow );
20057 %}
20058
20059 // Integers vector sub
20060 instruct vsubI(vec dst, vec src) %{
20061 predicate(UseAVX == 0);
20062 match(Set dst (SubVI dst src));
20063 format %{ "psubd $dst,$src\t! sub packedI" %}
20064 ins_encode %{
20065 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20066 %}
20067 ins_pipe( pipe_slow );
20068 %}
20069
20070 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20071 predicate(UseAVX > 0);
20072 match(Set dst (SubVI src1 src2));
20073 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20074 ins_encode %{
20075 int vlen_enc = vector_length_encoding(this);
20076 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20077 %}
20078 ins_pipe( pipe_slow );
20079 %}
20080
20081 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20082 predicate((UseAVX > 0) &&
20083 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20084 match(Set dst (SubVI src (LoadVector mem)));
20085 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20086 ins_encode %{
20087 int vlen_enc = vector_length_encoding(this);
20088 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20089 %}
20090 ins_pipe( pipe_slow );
20091 %}
20092
20093 // Longs vector sub
20094 instruct vsubL(vec dst, vec src) %{
20095 predicate(UseAVX == 0);
20096 match(Set dst (SubVL dst src));
20097 format %{ "psubq $dst,$src\t! sub packedL" %}
20098 ins_encode %{
20099 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20100 %}
20101 ins_pipe( pipe_slow );
20102 %}
20103
20104 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20105 predicate(UseAVX > 0);
20106 match(Set dst (SubVL src1 src2));
20107 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20108 ins_encode %{
20109 int vlen_enc = vector_length_encoding(this);
20110 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20111 %}
20112 ins_pipe( pipe_slow );
20113 %}
20114
20115
20116 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20117 predicate((UseAVX > 0) &&
20118 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20119 match(Set dst (SubVL src (LoadVector mem)));
20120 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20121 ins_encode %{
20122 int vlen_enc = vector_length_encoding(this);
20123 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20124 %}
20125 ins_pipe( pipe_slow );
20126 %}
20127
20128 // Floats vector sub
20129 instruct vsubF(vec dst, vec src) %{
20130 predicate(UseAVX == 0);
20131 match(Set dst (SubVF dst src));
20132 format %{ "subps $dst,$src\t! sub packedF" %}
20133 ins_encode %{
20134 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20135 %}
20136 ins_pipe( pipe_slow );
20137 %}
20138
20139 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20140 predicate(UseAVX > 0);
20141 match(Set dst (SubVF src1 src2));
20142 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20143 ins_encode %{
20144 int vlen_enc = vector_length_encoding(this);
20145 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20146 %}
20147 ins_pipe( pipe_slow );
20148 %}
20149
20150 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20151 predicate((UseAVX > 0) &&
20152 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20153 match(Set dst (SubVF src (LoadVector mem)));
20154 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20155 ins_encode %{
20156 int vlen_enc = vector_length_encoding(this);
20157 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20158 %}
20159 ins_pipe( pipe_slow );
20160 %}
20161
20162 // Doubles vector sub
20163 instruct vsubD(vec dst, vec src) %{
20164 predicate(UseAVX == 0);
20165 match(Set dst (SubVD dst src));
20166 format %{ "subpd $dst,$src\t! sub packedD" %}
20167 ins_encode %{
20168 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20169 %}
20170 ins_pipe( pipe_slow );
20171 %}
20172
20173 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20174 predicate(UseAVX > 0);
20175 match(Set dst (SubVD src1 src2));
20176 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20177 ins_encode %{
20178 int vlen_enc = vector_length_encoding(this);
20179 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20180 %}
20181 ins_pipe( pipe_slow );
20182 %}
20183
20184 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20185 predicate((UseAVX > 0) &&
20186 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20187 match(Set dst (SubVD src (LoadVector mem)));
20188 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20189 ins_encode %{
20190 int vlen_enc = vector_length_encoding(this);
20191 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20192 %}
20193 ins_pipe( pipe_slow );
20194 %}
20195
20196 // --------------------------------- MUL --------------------------------------
20197
20198 // Byte vector mul
20199 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20200 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20201 match(Set dst (MulVB src1 src2));
20202 effect(TEMP dst, TEMP xtmp);
20203 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20204 ins_encode %{
20205 assert(UseSSE > 3, "required");
20206 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20207 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20208 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20209 __ psllw($dst$$XMMRegister, 8);
20210 __ psrlw($dst$$XMMRegister, 8);
20211 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20212 %}
20213 ins_pipe( pipe_slow );
20214 %}
20215
20216 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20217 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20218 match(Set dst (MulVB src1 src2));
20219 effect(TEMP dst, TEMP xtmp);
20220 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20221 ins_encode %{
20222 assert(UseSSE > 3, "required");
20223 // Odd-index elements
20224 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20225 __ psrlw($dst$$XMMRegister, 8);
20226 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20227 __ psrlw($xtmp$$XMMRegister, 8);
20228 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20229 __ psllw($dst$$XMMRegister, 8);
20230 // Even-index elements
20231 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20232 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20233 __ psllw($xtmp$$XMMRegister, 8);
20234 __ psrlw($xtmp$$XMMRegister, 8);
20235 // Combine
20236 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20237 %}
20238 ins_pipe( pipe_slow );
20239 %}
20240
20241 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20242 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20243 match(Set dst (MulVB src1 src2));
20244 effect(TEMP xtmp1, TEMP xtmp2);
20245 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20246 ins_encode %{
20247 int vlen_enc = vector_length_encoding(this);
20248 // Odd-index elements
20249 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20250 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20251 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20252 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20253 // Even-index elements
20254 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20255 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20256 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20257 // Combine
20258 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20259 %}
20260 ins_pipe( pipe_slow );
20261 %}
20262
20263 // Shorts/Chars vector mul
20264 instruct vmulS(vec dst, vec src) %{
20265 predicate(UseAVX == 0);
20266 match(Set dst (MulVS dst src));
20267 format %{ "pmullw $dst,$src\t! mul packedS" %}
20268 ins_encode %{
20269 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20270 %}
20271 ins_pipe( pipe_slow );
20272 %}
20273
20274 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20275 predicate(UseAVX > 0);
20276 match(Set dst (MulVS src1 src2));
20277 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20278 ins_encode %{
20279 int vlen_enc = vector_length_encoding(this);
20280 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20281 %}
20282 ins_pipe( pipe_slow );
20283 %}
20284
20285 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20286 predicate((UseAVX > 0) &&
20287 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20288 match(Set dst (MulVS src (LoadVector mem)));
20289 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20290 ins_encode %{
20291 int vlen_enc = vector_length_encoding(this);
20292 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20293 %}
20294 ins_pipe( pipe_slow );
20295 %}
20296
20297 // Integers vector mul
20298 instruct vmulI(vec dst, vec src) %{
20299 predicate(UseAVX == 0);
20300 match(Set dst (MulVI dst src));
20301 format %{ "pmulld $dst,$src\t! mul packedI" %}
20302 ins_encode %{
20303 assert(UseSSE > 3, "required");
20304 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20305 %}
20306 ins_pipe( pipe_slow );
20307 %}
20308
20309 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20310 predicate(UseAVX > 0);
20311 match(Set dst (MulVI src1 src2));
20312 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20313 ins_encode %{
20314 int vlen_enc = vector_length_encoding(this);
20315 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20316 %}
20317 ins_pipe( pipe_slow );
20318 %}
20319
20320 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20321 predicate((UseAVX > 0) &&
20322 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20323 match(Set dst (MulVI src (LoadVector mem)));
20324 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20325 ins_encode %{
20326 int vlen_enc = vector_length_encoding(this);
20327 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20328 %}
20329 ins_pipe( pipe_slow );
20330 %}
20331
20332 // Longs vector mul
20333 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20334 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20335 VM_Version::supports_avx512dq()) ||
20336 VM_Version::supports_avx512vldq());
20337 match(Set dst (MulVL src1 src2));
20338 ins_cost(500);
20339 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20340 ins_encode %{
20341 assert(UseAVX > 2, "required");
20342 int vlen_enc = vector_length_encoding(this);
20343 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20344 %}
20345 ins_pipe( pipe_slow );
20346 %}
20347
20348 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20349 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20350 VM_Version::supports_avx512dq()) ||
20351 (Matcher::vector_length_in_bytes(n) > 8 &&
20352 VM_Version::supports_avx512vldq()));
20353 match(Set dst (MulVL src (LoadVector mem)));
20354 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20355 ins_cost(500);
20356 ins_encode %{
20357 assert(UseAVX > 2, "required");
20358 int vlen_enc = vector_length_encoding(this);
20359 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20360 %}
20361 ins_pipe( pipe_slow );
20362 %}
20363
20364 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20365 predicate(UseAVX == 0);
20366 match(Set dst (MulVL src1 src2));
20367 ins_cost(500);
20368 effect(TEMP dst, TEMP xtmp);
20369 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20370 ins_encode %{
20371 assert(VM_Version::supports_sse4_1(), "required");
20372 // Get the lo-hi products, only the lower 32 bits is in concerns
20373 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20374 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20375 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20376 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20377 __ psllq($dst$$XMMRegister, 32);
20378 // Get the lo-lo products
20379 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20380 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20381 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20382 %}
20383 ins_pipe( pipe_slow );
20384 %}
20385
20386 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20387 predicate(UseAVX > 0 &&
20388 ((Matcher::vector_length_in_bytes(n) == 64 &&
20389 !VM_Version::supports_avx512dq()) ||
20390 (Matcher::vector_length_in_bytes(n) < 64 &&
20391 !VM_Version::supports_avx512vldq())));
20392 match(Set dst (MulVL src1 src2));
20393 effect(TEMP xtmp1, TEMP xtmp2);
20394 ins_cost(500);
20395 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20396 ins_encode %{
20397 int vlen_enc = vector_length_encoding(this);
20398 // Get the lo-hi products, only the lower 32 bits is in concerns
20399 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20400 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20401 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20402 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20403 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20404 // Get the lo-lo products
20405 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20406 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20407 %}
20408 ins_pipe( pipe_slow );
20409 %}
20410
20411 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20412 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20413 match(Set dst (MulVL src1 src2));
20414 ins_cost(100);
20415 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20416 ins_encode %{
20417 int vlen_enc = vector_length_encoding(this);
20418 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20419 %}
20420 ins_pipe( pipe_slow );
20421 %}
20422
20423 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20424 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20425 match(Set dst (MulVL src1 src2));
20426 ins_cost(100);
20427 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20428 ins_encode %{
20429 int vlen_enc = vector_length_encoding(this);
20430 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20431 %}
20432 ins_pipe( pipe_slow );
20433 %}
20434
20435 // Floats vector mul
20436 instruct vmulF(vec dst, vec src) %{
20437 predicate(UseAVX == 0);
20438 match(Set dst (MulVF dst src));
20439 format %{ "mulps $dst,$src\t! mul packedF" %}
20440 ins_encode %{
20441 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20442 %}
20443 ins_pipe( pipe_slow );
20444 %}
20445
20446 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20447 predicate(UseAVX > 0);
20448 match(Set dst (MulVF src1 src2));
20449 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20450 ins_encode %{
20451 int vlen_enc = vector_length_encoding(this);
20452 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20453 %}
20454 ins_pipe( pipe_slow );
20455 %}
20456
20457 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20458 predicate((UseAVX > 0) &&
20459 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20460 match(Set dst (MulVF src (LoadVector mem)));
20461 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20462 ins_encode %{
20463 int vlen_enc = vector_length_encoding(this);
20464 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20465 %}
20466 ins_pipe( pipe_slow );
20467 %}
20468
20469 // Doubles vector mul
20470 instruct vmulD(vec dst, vec src) %{
20471 predicate(UseAVX == 0);
20472 match(Set dst (MulVD dst src));
20473 format %{ "mulpd $dst,$src\t! mul packedD" %}
20474 ins_encode %{
20475 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20476 %}
20477 ins_pipe( pipe_slow );
20478 %}
20479
20480 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20481 predicate(UseAVX > 0);
20482 match(Set dst (MulVD src1 src2));
20483 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20484 ins_encode %{
20485 int vlen_enc = vector_length_encoding(this);
20486 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20487 %}
20488 ins_pipe( pipe_slow );
20489 %}
20490
20491 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20492 predicate((UseAVX > 0) &&
20493 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20494 match(Set dst (MulVD src (LoadVector mem)));
20495 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20496 ins_encode %{
20497 int vlen_enc = vector_length_encoding(this);
20498 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20499 %}
20500 ins_pipe( pipe_slow );
20501 %}
20502
20503 // --------------------------------- DIV --------------------------------------
20504
20505 // Floats vector div
20506 instruct vdivF(vec dst, vec src) %{
20507 predicate(UseAVX == 0);
20508 match(Set dst (DivVF dst src));
20509 format %{ "divps $dst,$src\t! div packedF" %}
20510 ins_encode %{
20511 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20512 %}
20513 ins_pipe( pipe_slow );
20514 %}
20515
20516 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20517 predicate(UseAVX > 0);
20518 match(Set dst (DivVF src1 src2));
20519 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20520 ins_encode %{
20521 int vlen_enc = vector_length_encoding(this);
20522 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20523 %}
20524 ins_pipe( pipe_slow );
20525 %}
20526
20527 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20528 predicate((UseAVX > 0) &&
20529 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20530 match(Set dst (DivVF src (LoadVector mem)));
20531 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20532 ins_encode %{
20533 int vlen_enc = vector_length_encoding(this);
20534 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20535 %}
20536 ins_pipe( pipe_slow );
20537 %}
20538
20539 // Doubles vector div
20540 instruct vdivD(vec dst, vec src) %{
20541 predicate(UseAVX == 0);
20542 match(Set dst (DivVD dst src));
20543 format %{ "divpd $dst,$src\t! div packedD" %}
20544 ins_encode %{
20545 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20546 %}
20547 ins_pipe( pipe_slow );
20548 %}
20549
20550 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20551 predicate(UseAVX > 0);
20552 match(Set dst (DivVD src1 src2));
20553 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20554 ins_encode %{
20555 int vlen_enc = vector_length_encoding(this);
20556 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20557 %}
20558 ins_pipe( pipe_slow );
20559 %}
20560
20561 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20562 predicate((UseAVX > 0) &&
20563 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20564 match(Set dst (DivVD src (LoadVector mem)));
20565 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20566 ins_encode %{
20567 int vlen_enc = vector_length_encoding(this);
20568 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20569 %}
20570 ins_pipe( pipe_slow );
20571 %}
20572
20573 // ------------------------------ MinMax ---------------------------------------
20574
20575 // Byte, Short, Int vector Min/Max
20576 instruct minmax_reg_sse(vec dst, vec src) %{
20577 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20578 UseAVX == 0);
20579 match(Set dst (MinV dst src));
20580 match(Set dst (MaxV dst src));
20581 format %{ "vector_minmax $dst,$src\t! " %}
20582 ins_encode %{
20583 assert(UseSSE >= 4, "required");
20584
20585 int opcode = this->ideal_Opcode();
20586 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20587 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20588 %}
20589 ins_pipe( pipe_slow );
20590 %}
20591
20592 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20593 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20594 UseAVX > 0);
20595 match(Set dst (MinV src1 src2));
20596 match(Set dst (MaxV src1 src2));
20597 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20598 ins_encode %{
20599 int opcode = this->ideal_Opcode();
20600 int vlen_enc = vector_length_encoding(this);
20601 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20602
20603 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20604 %}
20605 ins_pipe( pipe_slow );
20606 %}
20607
20608 // Long vector Min/Max
20609 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20610 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20611 UseAVX == 0);
20612 match(Set dst (MinV dst src));
20613 match(Set dst (MaxV src dst));
20614 effect(TEMP dst, TEMP tmp);
20615 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20616 ins_encode %{
20617 assert(UseSSE >= 4, "required");
20618
20619 int opcode = this->ideal_Opcode();
20620 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20621 assert(elem_bt == T_LONG, "sanity");
20622
20623 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20624 %}
20625 ins_pipe( pipe_slow );
20626 %}
20627
20628 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20629 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20630 UseAVX > 0 && !VM_Version::supports_avx512vl());
20631 match(Set dst (MinV src1 src2));
20632 match(Set dst (MaxV src1 src2));
20633 effect(TEMP dst);
20634 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20635 ins_encode %{
20636 int vlen_enc = vector_length_encoding(this);
20637 int opcode = this->ideal_Opcode();
20638 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20639 assert(elem_bt == T_LONG, "sanity");
20640
20641 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20642 %}
20643 ins_pipe( pipe_slow );
20644 %}
20645
20646 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20647 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20648 Matcher::vector_element_basic_type(n) == T_LONG);
20649 match(Set dst (MinV src1 src2));
20650 match(Set dst (MaxV src1 src2));
20651 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20652 ins_encode %{
20653 assert(UseAVX > 2, "required");
20654
20655 int vlen_enc = vector_length_encoding(this);
20656 int opcode = this->ideal_Opcode();
20657 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20658 assert(elem_bt == T_LONG, "sanity");
20659
20660 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20661 %}
20662 ins_pipe( pipe_slow );
20663 %}
20664
20665 // Float/Double vector Min/Max
20666 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20667 predicate(VM_Version::supports_avx10_2() &&
20668 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20669 match(Set dst (MinV a b));
20670 match(Set dst (MaxV a b));
20671 format %{ "vector_minmaxFP $dst, $a, $b" %}
20672 ins_encode %{
20673 int vlen_enc = vector_length_encoding(this);
20674 int opcode = this->ideal_Opcode();
20675 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20676 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20677 %}
20678 ins_pipe( pipe_slow );
20679 %}
20680
20681 // Float/Double vector Min/Max
20682 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20683 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20684 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20685 UseAVX > 0);
20686 match(Set dst (MinV a b));
20687 match(Set dst (MaxV a b));
20688 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20689 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20690 ins_encode %{
20691 assert(UseAVX > 0, "required");
20692
20693 int opcode = this->ideal_Opcode();
20694 int vlen_enc = vector_length_encoding(this);
20695 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20696
20697 __ vminmax_fp(opcode, elem_bt,
20698 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20699 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20700 %}
20701 ins_pipe( pipe_slow );
20702 %}
20703
20704 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20705 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20706 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20707 match(Set dst (MinV a b));
20708 match(Set dst (MaxV a b));
20709 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20710 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20711 ins_encode %{
20712 assert(UseAVX > 2, "required");
20713
20714 int opcode = this->ideal_Opcode();
20715 int vlen_enc = vector_length_encoding(this);
20716 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20717
20718 __ evminmax_fp(opcode, elem_bt,
20719 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20720 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20721 %}
20722 ins_pipe( pipe_slow );
20723 %}
20724
20725 // ------------------------------ Unsigned vector Min/Max ----------------------
20726
20727 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20728 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20729 match(Set dst (UMinV a b));
20730 match(Set dst (UMaxV a b));
20731 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20732 ins_encode %{
20733 int opcode = this->ideal_Opcode();
20734 int vlen_enc = vector_length_encoding(this);
20735 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20736 assert(is_integral_type(elem_bt), "");
20737 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20738 %}
20739 ins_pipe( pipe_slow );
20740 %}
20741
20742 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20743 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20744 match(Set dst (UMinV a (LoadVector b)));
20745 match(Set dst (UMaxV a (LoadVector b)));
20746 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20747 ins_encode %{
20748 int opcode = this->ideal_Opcode();
20749 int vlen_enc = vector_length_encoding(this);
20750 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20751 assert(is_integral_type(elem_bt), "");
20752 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20753 %}
20754 ins_pipe( pipe_slow );
20755 %}
20756
20757 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20758 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20759 match(Set dst (UMinV a b));
20760 match(Set dst (UMaxV a b));
20761 effect(TEMP xtmp1, TEMP xtmp2);
20762 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20763 ins_encode %{
20764 int opcode = this->ideal_Opcode();
20765 int vlen_enc = vector_length_encoding(this);
20766 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20767 %}
20768 ins_pipe( pipe_slow );
20769 %}
20770
20771 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20772 match(Set dst (UMinV (Binary dst src2) mask));
20773 match(Set dst (UMaxV (Binary dst src2) mask));
20774 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20775 ins_encode %{
20776 int vlen_enc = vector_length_encoding(this);
20777 BasicType bt = Matcher::vector_element_basic_type(this);
20778 int opc = this->ideal_Opcode();
20779 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20780 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20781 %}
20782 ins_pipe( pipe_slow );
20783 %}
20784
20785 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20786 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20787 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20788 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20789 ins_encode %{
20790 int vlen_enc = vector_length_encoding(this);
20791 BasicType bt = Matcher::vector_element_basic_type(this);
20792 int opc = this->ideal_Opcode();
20793 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20794 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20795 %}
20796 ins_pipe( pipe_slow );
20797 %}
20798
20799 // --------------------------------- Signum/CopySign ---------------------------
20800
20801 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20802 match(Set dst (SignumF dst (Binary zero one)));
20803 effect(KILL cr);
20804 format %{ "signumF $dst, $dst" %}
20805 ins_encode %{
20806 int opcode = this->ideal_Opcode();
20807 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20808 %}
20809 ins_pipe( pipe_slow );
20810 %}
20811
20812 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20813 match(Set dst (SignumD dst (Binary zero one)));
20814 effect(KILL cr);
20815 format %{ "signumD $dst, $dst" %}
20816 ins_encode %{
20817 int opcode = this->ideal_Opcode();
20818 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20819 %}
20820 ins_pipe( pipe_slow );
20821 %}
20822
20823 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20824 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20825 match(Set dst (SignumVF src (Binary zero one)));
20826 match(Set dst (SignumVD src (Binary zero one)));
20827 effect(TEMP dst, TEMP xtmp1);
20828 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20829 ins_encode %{
20830 int opcode = this->ideal_Opcode();
20831 int vec_enc = vector_length_encoding(this);
20832 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20833 $xtmp1$$XMMRegister, vec_enc);
20834 %}
20835 ins_pipe( pipe_slow );
20836 %}
20837
20838 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20839 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20840 match(Set dst (SignumVF src (Binary zero one)));
20841 match(Set dst (SignumVD src (Binary zero one)));
20842 effect(TEMP dst, TEMP ktmp1);
20843 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20844 ins_encode %{
20845 int opcode = this->ideal_Opcode();
20846 int vec_enc = vector_length_encoding(this);
20847 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20848 $ktmp1$$KRegister, vec_enc);
20849 %}
20850 ins_pipe( pipe_slow );
20851 %}
20852
20853 // ---------------------------------------
20854 // For copySign use 0xE4 as writemask for vpternlog
20855 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20856 // C (xmm2) is set to 0x7FFFFFFF
20857 // Wherever xmm2 is 0, we want to pick from B (sign)
20858 // Wherever xmm2 is 1, we want to pick from A (src)
20859 //
20860 // A B C Result
20861 // 0 0 0 0
20862 // 0 0 1 0
20863 // 0 1 0 1
20864 // 0 1 1 0
20865 // 1 0 0 0
20866 // 1 0 1 1
20867 // 1 1 0 1
20868 // 1 1 1 1
20869 //
20870 // Result going from high bit to low bit is 0x11100100 = 0xe4
20871 // ---------------------------------------
20872
20873 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20874 match(Set dst (CopySignF dst src));
20875 effect(TEMP tmp1, TEMP tmp2);
20876 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20877 ins_encode %{
20878 __ movl($tmp2$$Register, 0x7FFFFFFF);
20879 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20880 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20881 %}
20882 ins_pipe( pipe_slow );
20883 %}
20884
20885 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20886 match(Set dst (CopySignD dst (Binary src zero)));
20887 ins_cost(100);
20888 effect(TEMP tmp1, TEMP tmp2);
20889 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20890 ins_encode %{
20891 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20892 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20893 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20894 %}
20895 ins_pipe( pipe_slow );
20896 %}
20897
20898 //----------------------------- CompressBits/ExpandBits ------------------------
20899
20900 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20901 predicate(n->bottom_type()->isa_int());
20902 match(Set dst (CompressBits src mask));
20903 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20904 ins_encode %{
20905 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20906 %}
20907 ins_pipe( pipe_slow );
20908 %}
20909
20910 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20911 predicate(n->bottom_type()->isa_int());
20912 match(Set dst (ExpandBits src mask));
20913 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20914 ins_encode %{
20915 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20916 %}
20917 ins_pipe( pipe_slow );
20918 %}
20919
20920 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20921 predicate(n->bottom_type()->isa_int());
20922 match(Set dst (CompressBits src (LoadI mask)));
20923 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20924 ins_encode %{
20925 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20926 %}
20927 ins_pipe( pipe_slow );
20928 %}
20929
20930 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20931 predicate(n->bottom_type()->isa_int());
20932 match(Set dst (ExpandBits src (LoadI mask)));
20933 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20934 ins_encode %{
20935 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20936 %}
20937 ins_pipe( pipe_slow );
20938 %}
20939
20940 // --------------------------------- Sqrt --------------------------------------
20941
20942 instruct vsqrtF_reg(vec dst, vec src) %{
20943 match(Set dst (SqrtVF src));
20944 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
20945 ins_encode %{
20946 assert(UseAVX > 0, "required");
20947 int vlen_enc = vector_length_encoding(this);
20948 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20949 %}
20950 ins_pipe( pipe_slow );
20951 %}
20952
20953 instruct vsqrtF_mem(vec dst, memory mem) %{
20954 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20955 match(Set dst (SqrtVF (LoadVector mem)));
20956 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
20957 ins_encode %{
20958 assert(UseAVX > 0, "required");
20959 int vlen_enc = vector_length_encoding(this);
20960 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20961 %}
20962 ins_pipe( pipe_slow );
20963 %}
20964
20965 // Floating point vector sqrt
20966 instruct vsqrtD_reg(vec dst, vec src) %{
20967 match(Set dst (SqrtVD src));
20968 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
20969 ins_encode %{
20970 assert(UseAVX > 0, "required");
20971 int vlen_enc = vector_length_encoding(this);
20972 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20973 %}
20974 ins_pipe( pipe_slow );
20975 %}
20976
20977 instruct vsqrtD_mem(vec dst, memory mem) %{
20978 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20979 match(Set dst (SqrtVD (LoadVector mem)));
20980 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
20981 ins_encode %{
20982 assert(UseAVX > 0, "required");
20983 int vlen_enc = vector_length_encoding(this);
20984 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20985 %}
20986 ins_pipe( pipe_slow );
20987 %}
20988
20989 // ------------------------------ Shift ---------------------------------------
20990
20991 // Left and right shift count vectors are the same on x86
20992 // (only lowest bits of xmm reg are used for count).
20993 instruct vshiftcnt(vec dst, rRegI cnt) %{
20994 match(Set dst (LShiftCntV cnt));
20995 match(Set dst (RShiftCntV cnt));
20996 format %{ "movdl $dst,$cnt\t! load shift count" %}
20997 ins_encode %{
20998 __ movdl($dst$$XMMRegister, $cnt$$Register);
20999 %}
21000 ins_pipe( pipe_slow );
21001 %}
21002
21003 // Byte vector shift
21004 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21005 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21006 match(Set dst ( LShiftVB src shift));
21007 match(Set dst ( RShiftVB src shift));
21008 match(Set dst (URShiftVB src shift));
21009 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21010 format %{"vector_byte_shift $dst,$src,$shift" %}
21011 ins_encode %{
21012 assert(UseSSE > 3, "required");
21013 int opcode = this->ideal_Opcode();
21014 bool sign = (opcode != Op_URShiftVB);
21015 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21016 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21017 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21018 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21019 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21020 %}
21021 ins_pipe( pipe_slow );
21022 %}
21023
21024 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21025 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21026 UseAVX <= 1);
21027 match(Set dst ( LShiftVB src shift));
21028 match(Set dst ( RShiftVB src shift));
21029 match(Set dst (URShiftVB src shift));
21030 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21031 format %{"vector_byte_shift $dst,$src,$shift" %}
21032 ins_encode %{
21033 assert(UseSSE > 3, "required");
21034 int opcode = this->ideal_Opcode();
21035 bool sign = (opcode != Op_URShiftVB);
21036 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21037 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21038 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21039 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21040 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21041 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21042 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21043 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21044 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21045 %}
21046 ins_pipe( pipe_slow );
21047 %}
21048
21049 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21050 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21051 UseAVX > 1);
21052 match(Set dst ( LShiftVB src shift));
21053 match(Set dst ( RShiftVB src shift));
21054 match(Set dst (URShiftVB src shift));
21055 effect(TEMP dst, TEMP tmp);
21056 format %{"vector_byte_shift $dst,$src,$shift" %}
21057 ins_encode %{
21058 int opcode = this->ideal_Opcode();
21059 bool sign = (opcode != Op_URShiftVB);
21060 int vlen_enc = Assembler::AVX_256bit;
21061 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21062 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21063 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21064 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21065 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21066 %}
21067 ins_pipe( pipe_slow );
21068 %}
21069
21070 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21071 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21072 match(Set dst ( LShiftVB src shift));
21073 match(Set dst ( RShiftVB src shift));
21074 match(Set dst (URShiftVB src shift));
21075 effect(TEMP dst, TEMP tmp);
21076 format %{"vector_byte_shift $dst,$src,$shift" %}
21077 ins_encode %{
21078 assert(UseAVX > 1, "required");
21079 int opcode = this->ideal_Opcode();
21080 bool sign = (opcode != Op_URShiftVB);
21081 int vlen_enc = Assembler::AVX_256bit;
21082 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21083 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21084 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21085 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21086 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21087 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21088 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21089 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21090 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21091 %}
21092 ins_pipe( pipe_slow );
21093 %}
21094
21095 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21096 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21097 match(Set dst ( LShiftVB src shift));
21098 match(Set dst (RShiftVB src shift));
21099 match(Set dst (URShiftVB src shift));
21100 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21101 format %{"vector_byte_shift $dst,$src,$shift" %}
21102 ins_encode %{
21103 assert(UseAVX > 2, "required");
21104 int opcode = this->ideal_Opcode();
21105 bool sign = (opcode != Op_URShiftVB);
21106 int vlen_enc = Assembler::AVX_512bit;
21107 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21108 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21109 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21110 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21111 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21112 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21113 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21114 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21115 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21116 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21117 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21118 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21119 %}
21120 ins_pipe( pipe_slow );
21121 %}
21122
21123 // Shorts vector logical right shift produces incorrect Java result
21124 // for negative data because java code convert short value into int with
21125 // sign extension before a shift. But char vectors are fine since chars are
21126 // unsigned values.
21127 // Shorts/Chars vector left shift
21128 instruct vshiftS(vec dst, vec src, vec shift) %{
21129 predicate(!n->as_ShiftV()->is_var_shift());
21130 match(Set dst ( LShiftVS src shift));
21131 match(Set dst ( RShiftVS src shift));
21132 match(Set dst (URShiftVS src shift));
21133 effect(TEMP dst, USE src, USE shift);
21134 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21135 ins_encode %{
21136 int opcode = this->ideal_Opcode();
21137 if (UseAVX > 0) {
21138 int vlen_enc = vector_length_encoding(this);
21139 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21140 } else {
21141 int vlen = Matcher::vector_length(this);
21142 if (vlen == 2) {
21143 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21144 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21145 } else if (vlen == 4) {
21146 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21147 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21148 } else {
21149 assert (vlen == 8, "sanity");
21150 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21151 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21152 }
21153 }
21154 %}
21155 ins_pipe( pipe_slow );
21156 %}
21157
21158 // Integers vector left shift
21159 instruct vshiftI(vec dst, vec src, vec shift) %{
21160 predicate(!n->as_ShiftV()->is_var_shift());
21161 match(Set dst ( LShiftVI src shift));
21162 match(Set dst ( RShiftVI src shift));
21163 match(Set dst (URShiftVI src shift));
21164 effect(TEMP dst, USE src, USE shift);
21165 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21166 ins_encode %{
21167 int opcode = this->ideal_Opcode();
21168 if (UseAVX > 0) {
21169 int vlen_enc = vector_length_encoding(this);
21170 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21171 } else {
21172 int vlen = Matcher::vector_length(this);
21173 if (vlen == 2) {
21174 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21175 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21176 } else {
21177 assert(vlen == 4, "sanity");
21178 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21179 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21180 }
21181 }
21182 %}
21183 ins_pipe( pipe_slow );
21184 %}
21185
21186 // Integers vector left constant shift
21187 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21188 match(Set dst (LShiftVI src (LShiftCntV shift)));
21189 match(Set dst (RShiftVI src (RShiftCntV shift)));
21190 match(Set dst (URShiftVI src (RShiftCntV shift)));
21191 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21192 ins_encode %{
21193 int opcode = this->ideal_Opcode();
21194 if (UseAVX > 0) {
21195 int vector_len = vector_length_encoding(this);
21196 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21197 } else {
21198 int vlen = Matcher::vector_length(this);
21199 if (vlen == 2) {
21200 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21201 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21202 } else {
21203 assert(vlen == 4, "sanity");
21204 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21205 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21206 }
21207 }
21208 %}
21209 ins_pipe( pipe_slow );
21210 %}
21211
21212 // Longs vector shift
21213 instruct vshiftL(vec dst, vec src, vec shift) %{
21214 predicate(!n->as_ShiftV()->is_var_shift());
21215 match(Set dst ( LShiftVL src shift));
21216 match(Set dst (URShiftVL src shift));
21217 effect(TEMP dst, USE src, USE shift);
21218 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21219 ins_encode %{
21220 int opcode = this->ideal_Opcode();
21221 if (UseAVX > 0) {
21222 int vlen_enc = vector_length_encoding(this);
21223 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21224 } else {
21225 assert(Matcher::vector_length(this) == 2, "");
21226 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21227 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21228 }
21229 %}
21230 ins_pipe( pipe_slow );
21231 %}
21232
21233 // Longs vector constant shift
21234 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21235 match(Set dst (LShiftVL src (LShiftCntV shift)));
21236 match(Set dst (URShiftVL src (RShiftCntV shift)));
21237 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21238 ins_encode %{
21239 int opcode = this->ideal_Opcode();
21240 if (UseAVX > 0) {
21241 int vector_len = vector_length_encoding(this);
21242 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21243 } else {
21244 assert(Matcher::vector_length(this) == 2, "");
21245 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21246 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21247 }
21248 %}
21249 ins_pipe( pipe_slow );
21250 %}
21251
21252 // -------------------ArithmeticRightShift -----------------------------------
21253 // Long vector arithmetic right shift
21254 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21255 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21256 match(Set dst (RShiftVL src shift));
21257 effect(TEMP dst, TEMP tmp);
21258 format %{ "vshiftq $dst,$src,$shift" %}
21259 ins_encode %{
21260 uint vlen = Matcher::vector_length(this);
21261 if (vlen == 2) {
21262 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21263 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21264 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21265 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21266 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21267 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21268 } else {
21269 assert(vlen == 4, "sanity");
21270 assert(UseAVX > 1, "required");
21271 int vlen_enc = Assembler::AVX_256bit;
21272 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21273 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21274 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21275 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21276 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21277 }
21278 %}
21279 ins_pipe( pipe_slow );
21280 %}
21281
21282 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21283 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21284 match(Set dst (RShiftVL src shift));
21285 format %{ "vshiftq $dst,$src,$shift" %}
21286 ins_encode %{
21287 int vlen_enc = vector_length_encoding(this);
21288 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21289 %}
21290 ins_pipe( pipe_slow );
21291 %}
21292
21293 // ------------------- Variable Shift -----------------------------
21294 // Byte variable shift
21295 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21296 predicate(Matcher::vector_length(n) <= 8 &&
21297 n->as_ShiftV()->is_var_shift() &&
21298 !VM_Version::supports_avx512bw());
21299 match(Set dst ( LShiftVB src shift));
21300 match(Set dst ( RShiftVB src shift));
21301 match(Set dst (URShiftVB src shift));
21302 effect(TEMP dst, TEMP vtmp);
21303 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21304 ins_encode %{
21305 assert(UseAVX >= 2, "required");
21306
21307 int opcode = this->ideal_Opcode();
21308 int vlen_enc = Assembler::AVX_128bit;
21309 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21310 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21311 %}
21312 ins_pipe( pipe_slow );
21313 %}
21314
21315 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21316 predicate(Matcher::vector_length(n) == 16 &&
21317 n->as_ShiftV()->is_var_shift() &&
21318 !VM_Version::supports_avx512bw());
21319 match(Set dst ( LShiftVB src shift));
21320 match(Set dst ( RShiftVB src shift));
21321 match(Set dst (URShiftVB src shift));
21322 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21323 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21324 ins_encode %{
21325 assert(UseAVX >= 2, "required");
21326
21327 int opcode = this->ideal_Opcode();
21328 int vlen_enc = Assembler::AVX_128bit;
21329 // Shift lower half and get word result in dst
21330 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21331
21332 // Shift upper half and get word result in vtmp1
21333 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21334 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21335 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21336
21337 // Merge and down convert the two word results to byte in dst
21338 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21339 %}
21340 ins_pipe( pipe_slow );
21341 %}
21342
21343 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21344 predicate(Matcher::vector_length(n) == 32 &&
21345 n->as_ShiftV()->is_var_shift() &&
21346 !VM_Version::supports_avx512bw());
21347 match(Set dst ( LShiftVB src shift));
21348 match(Set dst ( RShiftVB src shift));
21349 match(Set dst (URShiftVB src shift));
21350 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21351 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21352 ins_encode %{
21353 assert(UseAVX >= 2, "required");
21354
21355 int opcode = this->ideal_Opcode();
21356 int vlen_enc = Assembler::AVX_128bit;
21357 // Process lower 128 bits and get result in dst
21358 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21359 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21360 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21361 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21362 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21363
21364 // Process higher 128 bits and get result in vtmp3
21365 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21366 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21367 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21368 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21369 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21370 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21371 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21372
21373 // Merge the two results in dst
21374 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21375 %}
21376 ins_pipe( pipe_slow );
21377 %}
21378
21379 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21380 predicate(Matcher::vector_length(n) <= 32 &&
21381 n->as_ShiftV()->is_var_shift() &&
21382 VM_Version::supports_avx512bw());
21383 match(Set dst ( LShiftVB src shift));
21384 match(Set dst ( RShiftVB src shift));
21385 match(Set dst (URShiftVB src shift));
21386 effect(TEMP dst, TEMP vtmp);
21387 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21388 ins_encode %{
21389 assert(UseAVX > 2, "required");
21390
21391 int opcode = this->ideal_Opcode();
21392 int vlen_enc = vector_length_encoding(this);
21393 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21394 %}
21395 ins_pipe( pipe_slow );
21396 %}
21397
21398 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21399 predicate(Matcher::vector_length(n) == 64 &&
21400 n->as_ShiftV()->is_var_shift() &&
21401 VM_Version::supports_avx512bw());
21402 match(Set dst ( LShiftVB src shift));
21403 match(Set dst ( RShiftVB src shift));
21404 match(Set dst (URShiftVB src shift));
21405 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21406 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21407 ins_encode %{
21408 assert(UseAVX > 2, "required");
21409
21410 int opcode = this->ideal_Opcode();
21411 int vlen_enc = Assembler::AVX_256bit;
21412 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21413 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21414 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21415 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21416 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21417 %}
21418 ins_pipe( pipe_slow );
21419 %}
21420
21421 // Short variable shift
21422 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21423 predicate(Matcher::vector_length(n) <= 8 &&
21424 n->as_ShiftV()->is_var_shift() &&
21425 !VM_Version::supports_avx512bw());
21426 match(Set dst ( LShiftVS src shift));
21427 match(Set dst ( RShiftVS src shift));
21428 match(Set dst (URShiftVS src shift));
21429 effect(TEMP dst, TEMP vtmp);
21430 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21431 ins_encode %{
21432 assert(UseAVX >= 2, "required");
21433
21434 int opcode = this->ideal_Opcode();
21435 bool sign = (opcode != Op_URShiftVS);
21436 int vlen_enc = Assembler::AVX_256bit;
21437 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21438 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21439 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21440 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21441 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21442 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21443 %}
21444 ins_pipe( pipe_slow );
21445 %}
21446
21447 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21448 predicate(Matcher::vector_length(n) == 16 &&
21449 n->as_ShiftV()->is_var_shift() &&
21450 !VM_Version::supports_avx512bw());
21451 match(Set dst ( LShiftVS src shift));
21452 match(Set dst ( RShiftVS src shift));
21453 match(Set dst (URShiftVS src shift));
21454 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21455 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21456 ins_encode %{
21457 assert(UseAVX >= 2, "required");
21458
21459 int opcode = this->ideal_Opcode();
21460 bool sign = (opcode != Op_URShiftVS);
21461 int vlen_enc = Assembler::AVX_256bit;
21462 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21463 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21464 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21465 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21466 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21467
21468 // Shift upper half, with result in dst using vtmp1 as TEMP
21469 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21470 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21471 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21472 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21473 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21474 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21475
21476 // Merge lower and upper half result into dst
21477 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21478 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21479 %}
21480 ins_pipe( pipe_slow );
21481 %}
21482
21483 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21484 predicate(n->as_ShiftV()->is_var_shift() &&
21485 VM_Version::supports_avx512bw());
21486 match(Set dst ( LShiftVS src shift));
21487 match(Set dst ( RShiftVS src shift));
21488 match(Set dst (URShiftVS src shift));
21489 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21490 ins_encode %{
21491 assert(UseAVX > 2, "required");
21492
21493 int opcode = this->ideal_Opcode();
21494 int vlen_enc = vector_length_encoding(this);
21495 if (!VM_Version::supports_avx512vl()) {
21496 vlen_enc = Assembler::AVX_512bit;
21497 }
21498 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21499 %}
21500 ins_pipe( pipe_slow );
21501 %}
21502
21503 //Integer variable shift
21504 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21505 predicate(n->as_ShiftV()->is_var_shift());
21506 match(Set dst ( LShiftVI src shift));
21507 match(Set dst ( RShiftVI src shift));
21508 match(Set dst (URShiftVI src shift));
21509 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21510 ins_encode %{
21511 assert(UseAVX >= 2, "required");
21512
21513 int opcode = this->ideal_Opcode();
21514 int vlen_enc = vector_length_encoding(this);
21515 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21516 %}
21517 ins_pipe( pipe_slow );
21518 %}
21519
21520 //Long variable shift
21521 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21522 predicate(n->as_ShiftV()->is_var_shift());
21523 match(Set dst ( LShiftVL src shift));
21524 match(Set dst (URShiftVL src shift));
21525 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21526 ins_encode %{
21527 assert(UseAVX >= 2, "required");
21528
21529 int opcode = this->ideal_Opcode();
21530 int vlen_enc = vector_length_encoding(this);
21531 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21532 %}
21533 ins_pipe( pipe_slow );
21534 %}
21535
21536 //Long variable right shift arithmetic
21537 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21538 predicate(Matcher::vector_length(n) <= 4 &&
21539 n->as_ShiftV()->is_var_shift() &&
21540 UseAVX == 2);
21541 match(Set dst (RShiftVL src shift));
21542 effect(TEMP dst, TEMP vtmp);
21543 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21544 ins_encode %{
21545 int opcode = this->ideal_Opcode();
21546 int vlen_enc = vector_length_encoding(this);
21547 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21548 $vtmp$$XMMRegister);
21549 %}
21550 ins_pipe( pipe_slow );
21551 %}
21552
21553 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21554 predicate(n->as_ShiftV()->is_var_shift() &&
21555 UseAVX > 2);
21556 match(Set dst (RShiftVL src shift));
21557 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21558 ins_encode %{
21559 int opcode = this->ideal_Opcode();
21560 int vlen_enc = vector_length_encoding(this);
21561 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21562 %}
21563 ins_pipe( pipe_slow );
21564 %}
21565
21566 // --------------------------------- AND --------------------------------------
21567
21568 instruct vand(vec dst, vec src) %{
21569 predicate(UseAVX == 0);
21570 match(Set dst (AndV dst src));
21571 format %{ "pand $dst,$src\t! and vectors" %}
21572 ins_encode %{
21573 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21574 %}
21575 ins_pipe( pipe_slow );
21576 %}
21577
21578 instruct vand_reg(vec dst, vec src1, vec src2) %{
21579 predicate(UseAVX > 0);
21580 match(Set dst (AndV src1 src2));
21581 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21582 ins_encode %{
21583 int vlen_enc = vector_length_encoding(this);
21584 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21585 %}
21586 ins_pipe( pipe_slow );
21587 %}
21588
21589 instruct vand_mem(vec dst, vec src, memory mem) %{
21590 predicate((UseAVX > 0) &&
21591 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21592 match(Set dst (AndV src (LoadVector mem)));
21593 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21594 ins_encode %{
21595 int vlen_enc = vector_length_encoding(this);
21596 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21597 %}
21598 ins_pipe( pipe_slow );
21599 %}
21600
21601 // --------------------------------- OR ---------------------------------------
21602
21603 instruct vor(vec dst, vec src) %{
21604 predicate(UseAVX == 0);
21605 match(Set dst (OrV dst src));
21606 format %{ "por $dst,$src\t! or vectors" %}
21607 ins_encode %{
21608 __ por($dst$$XMMRegister, $src$$XMMRegister);
21609 %}
21610 ins_pipe( pipe_slow );
21611 %}
21612
21613 instruct vor_reg(vec dst, vec src1, vec src2) %{
21614 predicate(UseAVX > 0);
21615 match(Set dst (OrV src1 src2));
21616 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21617 ins_encode %{
21618 int vlen_enc = vector_length_encoding(this);
21619 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21620 %}
21621 ins_pipe( pipe_slow );
21622 %}
21623
21624 instruct vor_mem(vec dst, vec src, memory mem) %{
21625 predicate((UseAVX > 0) &&
21626 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21627 match(Set dst (OrV src (LoadVector mem)));
21628 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21629 ins_encode %{
21630 int vlen_enc = vector_length_encoding(this);
21631 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21632 %}
21633 ins_pipe( pipe_slow );
21634 %}
21635
21636 // --------------------------------- XOR --------------------------------------
21637
21638 instruct vxor(vec dst, vec src) %{
21639 predicate(UseAVX == 0);
21640 match(Set dst (XorV dst src));
21641 format %{ "pxor $dst,$src\t! xor vectors" %}
21642 ins_encode %{
21643 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21644 %}
21645 ins_pipe( pipe_slow );
21646 %}
21647
21648 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21649 predicate(UseAVX > 0);
21650 match(Set dst (XorV src1 src2));
21651 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21652 ins_encode %{
21653 int vlen_enc = vector_length_encoding(this);
21654 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21655 %}
21656 ins_pipe( pipe_slow );
21657 %}
21658
21659 instruct vxor_mem(vec dst, vec src, memory mem) %{
21660 predicate((UseAVX > 0) &&
21661 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21662 match(Set dst (XorV src (LoadVector mem)));
21663 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21664 ins_encode %{
21665 int vlen_enc = vector_length_encoding(this);
21666 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21667 %}
21668 ins_pipe( pipe_slow );
21669 %}
21670
21671 // --------------------------------- VectorCast --------------------------------------
21672
21673 instruct vcastBtoX(vec dst, vec src) %{
21674 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21675 match(Set dst (VectorCastB2X src));
21676 format %{ "vector_cast_b2x $dst,$src\t!" %}
21677 ins_encode %{
21678 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21679 int vlen_enc = vector_length_encoding(this);
21680 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21681 %}
21682 ins_pipe( pipe_slow );
21683 %}
21684
21685 instruct vcastBtoD(legVec dst, legVec src) %{
21686 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21687 match(Set dst (VectorCastB2X src));
21688 format %{ "vector_cast_b2x $dst,$src\t!" %}
21689 ins_encode %{
21690 int vlen_enc = vector_length_encoding(this);
21691 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21692 %}
21693 ins_pipe( pipe_slow );
21694 %}
21695
21696 instruct castStoX(vec dst, vec src) %{
21697 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21698 Matcher::vector_length(n->in(1)) <= 8 && // src
21699 Matcher::vector_element_basic_type(n) == T_BYTE);
21700 match(Set dst (VectorCastS2X src));
21701 format %{ "vector_cast_s2x $dst,$src" %}
21702 ins_encode %{
21703 assert(UseAVX > 0, "required");
21704
21705 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21706 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21707 %}
21708 ins_pipe( pipe_slow );
21709 %}
21710
21711 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21712 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21713 Matcher::vector_length(n->in(1)) == 16 && // src
21714 Matcher::vector_element_basic_type(n) == T_BYTE);
21715 effect(TEMP dst, TEMP vtmp);
21716 match(Set dst (VectorCastS2X src));
21717 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21718 ins_encode %{
21719 assert(UseAVX > 0, "required");
21720
21721 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21722 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21723 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21724 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21725 %}
21726 ins_pipe( pipe_slow );
21727 %}
21728
21729 instruct vcastStoX_evex(vec dst, vec src) %{
21730 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21731 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21732 match(Set dst (VectorCastS2X src));
21733 format %{ "vector_cast_s2x $dst,$src\t!" %}
21734 ins_encode %{
21735 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21736 int src_vlen_enc = vector_length_encoding(this, $src);
21737 int vlen_enc = vector_length_encoding(this);
21738 switch (to_elem_bt) {
21739 case T_BYTE:
21740 if (!VM_Version::supports_avx512vl()) {
21741 vlen_enc = Assembler::AVX_512bit;
21742 }
21743 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21744 break;
21745 case T_INT:
21746 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21747 break;
21748 case T_FLOAT:
21749 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21750 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21751 break;
21752 case T_LONG:
21753 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21754 break;
21755 case T_DOUBLE: {
21756 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21757 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21758 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21759 break;
21760 }
21761 default:
21762 ShouldNotReachHere();
21763 }
21764 %}
21765 ins_pipe( pipe_slow );
21766 %}
21767
21768 instruct castItoX(vec dst, vec src) %{
21769 predicate(UseAVX <= 2 &&
21770 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21771 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21772 match(Set dst (VectorCastI2X src));
21773 format %{ "vector_cast_i2x $dst,$src" %}
21774 ins_encode %{
21775 assert(UseAVX > 0, "required");
21776
21777 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21778 int vlen_enc = vector_length_encoding(this, $src);
21779
21780 if (to_elem_bt == T_BYTE) {
21781 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21782 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21783 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21784 } else {
21785 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21786 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21787 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21788 }
21789 %}
21790 ins_pipe( pipe_slow );
21791 %}
21792
21793 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21794 predicate(UseAVX <= 2 &&
21795 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21796 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21797 match(Set dst (VectorCastI2X src));
21798 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21799 effect(TEMP dst, TEMP vtmp);
21800 ins_encode %{
21801 assert(UseAVX > 0, "required");
21802
21803 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21804 int vlen_enc = vector_length_encoding(this, $src);
21805
21806 if (to_elem_bt == T_BYTE) {
21807 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21808 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21809 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21810 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21811 } else {
21812 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21813 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21814 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21815 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21816 }
21817 %}
21818 ins_pipe( pipe_slow );
21819 %}
21820
21821 instruct vcastItoX_evex(vec dst, vec src) %{
21822 predicate(UseAVX > 2 ||
21823 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21824 match(Set dst (VectorCastI2X src));
21825 format %{ "vector_cast_i2x $dst,$src\t!" %}
21826 ins_encode %{
21827 assert(UseAVX > 0, "required");
21828
21829 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21830 int src_vlen_enc = vector_length_encoding(this, $src);
21831 int dst_vlen_enc = vector_length_encoding(this);
21832 switch (dst_elem_bt) {
21833 case T_BYTE:
21834 if (!VM_Version::supports_avx512vl()) {
21835 src_vlen_enc = Assembler::AVX_512bit;
21836 }
21837 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21838 break;
21839 case T_SHORT:
21840 if (!VM_Version::supports_avx512vl()) {
21841 src_vlen_enc = Assembler::AVX_512bit;
21842 }
21843 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21844 break;
21845 case T_FLOAT:
21846 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21847 break;
21848 case T_LONG:
21849 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21850 break;
21851 case T_DOUBLE:
21852 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21853 break;
21854 default:
21855 ShouldNotReachHere();
21856 }
21857 %}
21858 ins_pipe( pipe_slow );
21859 %}
21860
21861 instruct vcastLtoBS(vec dst, vec src) %{
21862 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21863 UseAVX <= 2);
21864 match(Set dst (VectorCastL2X src));
21865 format %{ "vector_cast_l2x $dst,$src" %}
21866 ins_encode %{
21867 assert(UseAVX > 0, "required");
21868
21869 int vlen = Matcher::vector_length_in_bytes(this, $src);
21870 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21871 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21872 : ExternalAddress(vector_int_to_short_mask());
21873 if (vlen <= 16) {
21874 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21875 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21876 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21877 } else {
21878 assert(vlen <= 32, "required");
21879 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21880 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21881 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21882 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21883 }
21884 if (to_elem_bt == T_BYTE) {
21885 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21886 }
21887 %}
21888 ins_pipe( pipe_slow );
21889 %}
21890
21891 instruct vcastLtoX_evex(vec dst, vec src) %{
21892 predicate(UseAVX > 2 ||
21893 (Matcher::vector_element_basic_type(n) == T_INT ||
21894 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21895 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21896 match(Set dst (VectorCastL2X src));
21897 format %{ "vector_cast_l2x $dst,$src\t!" %}
21898 ins_encode %{
21899 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21900 int vlen = Matcher::vector_length_in_bytes(this, $src);
21901 int vlen_enc = vector_length_encoding(this, $src);
21902 switch (to_elem_bt) {
21903 case T_BYTE:
21904 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21905 vlen_enc = Assembler::AVX_512bit;
21906 }
21907 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21908 break;
21909 case T_SHORT:
21910 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21911 vlen_enc = Assembler::AVX_512bit;
21912 }
21913 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21914 break;
21915 case T_INT:
21916 if (vlen == 8) {
21917 if ($dst$$XMMRegister != $src$$XMMRegister) {
21918 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21919 }
21920 } else if (vlen == 16) {
21921 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21922 } else if (vlen == 32) {
21923 if (UseAVX > 2) {
21924 if (!VM_Version::supports_avx512vl()) {
21925 vlen_enc = Assembler::AVX_512bit;
21926 }
21927 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21928 } else {
21929 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21930 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21931 }
21932 } else { // vlen == 64
21933 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21934 }
21935 break;
21936 case T_FLOAT:
21937 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21938 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21939 break;
21940 case T_DOUBLE:
21941 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21942 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21943 break;
21944
21945 default: assert(false, "%s", type2name(to_elem_bt));
21946 }
21947 %}
21948 ins_pipe( pipe_slow );
21949 %}
21950
21951 instruct vcastFtoD_reg(vec dst, vec src) %{
21952 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21953 match(Set dst (VectorCastF2X src));
21954 format %{ "vector_cast_f2d $dst,$src\t!" %}
21955 ins_encode %{
21956 int vlen_enc = vector_length_encoding(this);
21957 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21958 %}
21959 ins_pipe( pipe_slow );
21960 %}
21961
21962
21963 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21964 predicate(!VM_Version::supports_avx10_2() &&
21965 !VM_Version::supports_avx512vl() &&
21966 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21967 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21968 is_integral_type(Matcher::vector_element_basic_type(n)));
21969 match(Set dst (VectorCastF2X src));
21970 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21971 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21972 ins_encode %{
21973 int vlen_enc = vector_length_encoding(this, $src);
21974 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21975 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21976 // 32 bit addresses for register indirect addressing mode since stub constants
21977 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21978 // However, targets are free to increase this limit, but having a large code cache size
21979 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21980 // cap we save a temporary register allocation which in limiting case can prevent
21981 // spilling in high register pressure blocks.
21982 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21983 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21984 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21985 %}
21986 ins_pipe( pipe_slow );
21987 %}
21988
21989 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21990 predicate(!VM_Version::supports_avx10_2() &&
21991 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21992 is_integral_type(Matcher::vector_element_basic_type(n)));
21993 match(Set dst (VectorCastF2X src));
21994 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21995 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21996 ins_encode %{
21997 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21998 if (to_elem_bt == T_LONG) {
21999 int vlen_enc = vector_length_encoding(this);
22000 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22001 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22002 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22003 } else {
22004 int vlen_enc = vector_length_encoding(this, $src);
22005 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22006 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22007 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22008 }
22009 %}
22010 ins_pipe( pipe_slow );
22011 %}
22012
22013 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22014 predicate(VM_Version::supports_avx10_2() &&
22015 is_integral_type(Matcher::vector_element_basic_type(n)));
22016 match(Set dst (VectorCastF2X src));
22017 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22018 ins_encode %{
22019 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22020 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22021 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22022 %}
22023 ins_pipe( pipe_slow );
22024 %}
22025
22026 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22027 predicate(VM_Version::supports_avx10_2() &&
22028 is_integral_type(Matcher::vector_element_basic_type(n)));
22029 match(Set dst (VectorCastF2X (LoadVector src)));
22030 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22031 ins_encode %{
22032 int vlen = Matcher::vector_length(this);
22033 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22034 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22035 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22036 %}
22037 ins_pipe( pipe_slow );
22038 %}
22039
22040 instruct vcastDtoF_reg(vec dst, vec src) %{
22041 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22042 match(Set dst (VectorCastD2X src));
22043 format %{ "vector_cast_d2x $dst,$src\t!" %}
22044 ins_encode %{
22045 int vlen_enc = vector_length_encoding(this, $src);
22046 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22047 %}
22048 ins_pipe( pipe_slow );
22049 %}
22050
22051 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22052 predicate(!VM_Version::supports_avx10_2() &&
22053 !VM_Version::supports_avx512vl() &&
22054 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22055 is_integral_type(Matcher::vector_element_basic_type(n)));
22056 match(Set dst (VectorCastD2X src));
22057 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22058 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22059 ins_encode %{
22060 int vlen_enc = vector_length_encoding(this, $src);
22061 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22062 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22063 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22064 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22065 %}
22066 ins_pipe( pipe_slow );
22067 %}
22068
22069 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22070 predicate(!VM_Version::supports_avx10_2() &&
22071 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22072 is_integral_type(Matcher::vector_element_basic_type(n)));
22073 match(Set dst (VectorCastD2X src));
22074 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22075 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22076 ins_encode %{
22077 int vlen_enc = vector_length_encoding(this, $src);
22078 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22079 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22080 ExternalAddress(vector_float_signflip());
22081 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22082 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22083 %}
22084 ins_pipe( pipe_slow );
22085 %}
22086
22087 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22088 predicate(VM_Version::supports_avx10_2() &&
22089 is_integral_type(Matcher::vector_element_basic_type(n)));
22090 match(Set dst (VectorCastD2X src));
22091 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22092 ins_encode %{
22093 int vlen_enc = vector_length_encoding(this, $src);
22094 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22095 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22096 %}
22097 ins_pipe( pipe_slow );
22098 %}
22099
22100 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22101 predicate(VM_Version::supports_avx10_2() &&
22102 is_integral_type(Matcher::vector_element_basic_type(n)));
22103 match(Set dst (VectorCastD2X (LoadVector src)));
22104 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22105 ins_encode %{
22106 int vlen = Matcher::vector_length(this);
22107 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22108 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22109 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22110 %}
22111 ins_pipe( pipe_slow );
22112 %}
22113
22114 instruct vucast(vec dst, vec src) %{
22115 match(Set dst (VectorUCastB2X src));
22116 match(Set dst (VectorUCastS2X src));
22117 match(Set dst (VectorUCastI2X src));
22118 format %{ "vector_ucast $dst,$src\t!" %}
22119 ins_encode %{
22120 assert(UseAVX > 0, "required");
22121
22122 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22123 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22124 int vlen_enc = vector_length_encoding(this);
22125 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22126 %}
22127 ins_pipe( pipe_slow );
22128 %}
22129
22130 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22131 predicate(!VM_Version::supports_avx512vl() &&
22132 Matcher::vector_length_in_bytes(n) < 64 &&
22133 Matcher::vector_element_basic_type(n) == T_INT);
22134 match(Set dst (RoundVF src));
22135 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22136 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22137 ins_encode %{
22138 int vlen_enc = vector_length_encoding(this);
22139 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22140 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22141 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22142 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22143 %}
22144 ins_pipe( pipe_slow );
22145 %}
22146
22147 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22148 predicate((VM_Version::supports_avx512vl() ||
22149 Matcher::vector_length_in_bytes(n) == 64) &&
22150 Matcher::vector_element_basic_type(n) == T_INT);
22151 match(Set dst (RoundVF src));
22152 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22153 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22154 ins_encode %{
22155 int vlen_enc = vector_length_encoding(this);
22156 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22157 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22158 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22159 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22160 %}
22161 ins_pipe( pipe_slow );
22162 %}
22163
22164 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22165 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22166 match(Set dst (RoundVD src));
22167 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22168 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22169 ins_encode %{
22170 int vlen_enc = vector_length_encoding(this);
22171 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22172 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22173 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22174 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22175 %}
22176 ins_pipe( pipe_slow );
22177 %}
22178
22179 // --------------------------------- VectorMaskCmp --------------------------------------
22180
22181 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22182 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22183 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22184 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22185 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22186 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22187 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22188 ins_encode %{
22189 int vlen_enc = vector_length_encoding(this, $src1);
22190 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22191 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22192 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22193 } else {
22194 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22195 }
22196 %}
22197 ins_pipe( pipe_slow );
22198 %}
22199
22200 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22201 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22202 n->bottom_type()->isa_vectmask() == nullptr &&
22203 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22204 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22205 effect(TEMP ktmp);
22206 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22207 ins_encode %{
22208 int vlen_enc = Assembler::AVX_512bit;
22209 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22210 KRegister mask = k0; // The comparison itself is not being masked.
22211 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22212 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22213 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22214 } else {
22215 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22216 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22217 }
22218 %}
22219 ins_pipe( pipe_slow );
22220 %}
22221
22222 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22223 predicate(n->bottom_type()->isa_vectmask() &&
22224 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22225 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22226 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22227 ins_encode %{
22228 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22229 int vlen_enc = vector_length_encoding(this, $src1);
22230 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22231 KRegister mask = k0; // The comparison itself is not being masked.
22232 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22233 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22234 } else {
22235 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22236 }
22237 %}
22238 ins_pipe( pipe_slow );
22239 %}
22240
22241 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22242 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22243 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22244 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22245 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22246 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22247 (n->in(2)->get_int() == BoolTest::eq ||
22248 n->in(2)->get_int() == BoolTest::lt ||
22249 n->in(2)->get_int() == BoolTest::gt)); // cond
22250 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22251 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22252 ins_encode %{
22253 int vlen_enc = vector_length_encoding(this, $src1);
22254 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22255 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22256 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22257 %}
22258 ins_pipe( pipe_slow );
22259 %}
22260
22261 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22262 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22263 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22264 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22265 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22266 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22267 (n->in(2)->get_int() == BoolTest::ne ||
22268 n->in(2)->get_int() == BoolTest::le ||
22269 n->in(2)->get_int() == BoolTest::ge)); // cond
22270 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22271 effect(TEMP dst, TEMP xtmp);
22272 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22273 ins_encode %{
22274 int vlen_enc = vector_length_encoding(this, $src1);
22275 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22276 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22277 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22278 %}
22279 ins_pipe( pipe_slow );
22280 %}
22281
22282 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22283 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22284 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22285 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22286 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22287 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22288 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22289 effect(TEMP dst, TEMP xtmp);
22290 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22291 ins_encode %{
22292 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22293 int vlen_enc = vector_length_encoding(this, $src1);
22294 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22295 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22296
22297 if (vlen_enc == Assembler::AVX_128bit) {
22298 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22299 } else {
22300 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22301 }
22302 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22303 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22304 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22305 %}
22306 ins_pipe( pipe_slow );
22307 %}
22308
22309 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22310 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22311 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22312 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22313 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22314 effect(TEMP ktmp);
22315 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22316 ins_encode %{
22317 assert(UseAVX > 2, "required");
22318
22319 int vlen_enc = vector_length_encoding(this, $src1);
22320 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22321 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22322 KRegister mask = k0; // The comparison itself is not being masked.
22323 bool merge = false;
22324 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22325
22326 switch (src1_elem_bt) {
22327 case T_INT: {
22328 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22329 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22330 break;
22331 }
22332 case T_LONG: {
22333 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22334 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22335 break;
22336 }
22337 default: assert(false, "%s", type2name(src1_elem_bt));
22338 }
22339 %}
22340 ins_pipe( pipe_slow );
22341 %}
22342
22343
22344 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22345 predicate(n->bottom_type()->isa_vectmask() &&
22346 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22347 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22348 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22349 ins_encode %{
22350 assert(UseAVX > 2, "required");
22351 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22352
22353 int vlen_enc = vector_length_encoding(this, $src1);
22354 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22355 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22356 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22357
22358 // Comparison i
22359 switch (src1_elem_bt) {
22360 case T_BYTE: {
22361 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22362 break;
22363 }
22364 case T_SHORT: {
22365 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22366 break;
22367 }
22368 case T_INT: {
22369 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22370 break;
22371 }
22372 case T_LONG: {
22373 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22374 break;
22375 }
22376 default: assert(false, "%s", type2name(src1_elem_bt));
22377 }
22378 %}
22379 ins_pipe( pipe_slow );
22380 %}
22381
22382 // Extract
22383
22384 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22385 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22386 match(Set dst (ExtractI src idx));
22387 match(Set dst (ExtractS src idx));
22388 match(Set dst (ExtractB src idx));
22389 format %{ "extractI $dst,$src,$idx\t!" %}
22390 ins_encode %{
22391 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22392
22393 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22394 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22395 %}
22396 ins_pipe( pipe_slow );
22397 %}
22398
22399 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22400 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22401 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22402 match(Set dst (ExtractI src idx));
22403 match(Set dst (ExtractS src idx));
22404 match(Set dst (ExtractB src idx));
22405 effect(TEMP vtmp);
22406 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22407 ins_encode %{
22408 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22409
22410 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22411 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22412 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22413 %}
22414 ins_pipe( pipe_slow );
22415 %}
22416
22417 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22418 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22419 match(Set dst (ExtractL src idx));
22420 format %{ "extractL $dst,$src,$idx\t!" %}
22421 ins_encode %{
22422 assert(UseSSE >= 4, "required");
22423 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22424
22425 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22426 %}
22427 ins_pipe( pipe_slow );
22428 %}
22429
22430 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22431 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22432 Matcher::vector_length(n->in(1)) == 8); // src
22433 match(Set dst (ExtractL src idx));
22434 effect(TEMP vtmp);
22435 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22436 ins_encode %{
22437 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22438
22439 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22440 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22441 %}
22442 ins_pipe( pipe_slow );
22443 %}
22444
22445 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22446 predicate(Matcher::vector_length(n->in(1)) <= 4);
22447 match(Set dst (ExtractF src idx));
22448 effect(TEMP dst, TEMP vtmp);
22449 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22450 ins_encode %{
22451 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22452
22453 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22454 %}
22455 ins_pipe( pipe_slow );
22456 %}
22457
22458 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22459 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22460 Matcher::vector_length(n->in(1)/*src*/) == 16);
22461 match(Set dst (ExtractF src idx));
22462 effect(TEMP vtmp);
22463 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22464 ins_encode %{
22465 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22466
22467 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22468 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22469 %}
22470 ins_pipe( pipe_slow );
22471 %}
22472
22473 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22474 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22475 match(Set dst (ExtractD src idx));
22476 format %{ "extractD $dst,$src,$idx\t!" %}
22477 ins_encode %{
22478 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22479
22480 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22481 %}
22482 ins_pipe( pipe_slow );
22483 %}
22484
22485 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22486 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22487 Matcher::vector_length(n->in(1)) == 8); // src
22488 match(Set dst (ExtractD src idx));
22489 effect(TEMP vtmp);
22490 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22491 ins_encode %{
22492 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22493
22494 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22495 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22496 %}
22497 ins_pipe( pipe_slow );
22498 %}
22499
22500 // --------------------------------- Vector Blend --------------------------------------
22501
22502 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22503 predicate(UseAVX == 0);
22504 match(Set dst (VectorBlend (Binary dst src) mask));
22505 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22506 effect(TEMP tmp);
22507 ins_encode %{
22508 assert(UseSSE >= 4, "required");
22509
22510 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22511 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22512 }
22513 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22514 %}
22515 ins_pipe( pipe_slow );
22516 %}
22517
22518 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22519 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22520 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22521 Matcher::vector_length_in_bytes(n) <= 32 &&
22522 is_integral_type(Matcher::vector_element_basic_type(n)));
22523 match(Set dst (VectorBlend (Binary src1 src2) mask));
22524 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22525 ins_encode %{
22526 int vlen_enc = vector_length_encoding(this);
22527 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22528 %}
22529 ins_pipe( pipe_slow );
22530 %}
22531
22532 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22533 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22534 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22535 Matcher::vector_length_in_bytes(n) <= 32 &&
22536 !is_integral_type(Matcher::vector_element_basic_type(n)));
22537 match(Set dst (VectorBlend (Binary src1 src2) mask));
22538 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22539 ins_encode %{
22540 int vlen_enc = vector_length_encoding(this);
22541 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22542 %}
22543 ins_pipe( pipe_slow );
22544 %}
22545
22546 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22547 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22548 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22549 Matcher::vector_length_in_bytes(n) <= 32);
22550 match(Set dst (VectorBlend (Binary src1 src2) mask));
22551 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22552 effect(TEMP vtmp, TEMP dst);
22553 ins_encode %{
22554 int vlen_enc = vector_length_encoding(this);
22555 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22556 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22557 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22558 %}
22559 ins_pipe( pipe_slow );
22560 %}
22561
22562 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22563 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22564 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22565 match(Set dst (VectorBlend (Binary src1 src2) mask));
22566 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22567 effect(TEMP ktmp);
22568 ins_encode %{
22569 int vlen_enc = Assembler::AVX_512bit;
22570 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22571 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22572 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22573 %}
22574 ins_pipe( pipe_slow );
22575 %}
22576
22577
22578 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22579 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22580 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22581 VM_Version::supports_avx512bw()));
22582 match(Set dst (VectorBlend (Binary src1 src2) mask));
22583 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22584 ins_encode %{
22585 int vlen_enc = vector_length_encoding(this);
22586 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22587 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22588 %}
22589 ins_pipe( pipe_slow );
22590 %}
22591
22592 // --------------------------------- ABS --------------------------------------
22593 // a = |a|
22594 instruct vabsB_reg(vec dst, vec src) %{
22595 match(Set dst (AbsVB src));
22596 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22597 ins_encode %{
22598 uint vlen = Matcher::vector_length(this);
22599 if (vlen <= 16) {
22600 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22601 } else {
22602 int vlen_enc = vector_length_encoding(this);
22603 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22604 }
22605 %}
22606 ins_pipe( pipe_slow );
22607 %}
22608
22609 instruct vabsS_reg(vec dst, vec src) %{
22610 match(Set dst (AbsVS src));
22611 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22612 ins_encode %{
22613 uint vlen = Matcher::vector_length(this);
22614 if (vlen <= 8) {
22615 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22616 } else {
22617 int vlen_enc = vector_length_encoding(this);
22618 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22619 }
22620 %}
22621 ins_pipe( pipe_slow );
22622 %}
22623
22624 instruct vabsI_reg(vec dst, vec src) %{
22625 match(Set dst (AbsVI src));
22626 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22627 ins_encode %{
22628 uint vlen = Matcher::vector_length(this);
22629 if (vlen <= 4) {
22630 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22631 } else {
22632 int vlen_enc = vector_length_encoding(this);
22633 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22634 }
22635 %}
22636 ins_pipe( pipe_slow );
22637 %}
22638
22639 instruct vabsL_reg(vec dst, vec src) %{
22640 match(Set dst (AbsVL src));
22641 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22642 ins_encode %{
22643 assert(UseAVX > 2, "required");
22644 int vlen_enc = vector_length_encoding(this);
22645 if (!VM_Version::supports_avx512vl()) {
22646 vlen_enc = Assembler::AVX_512bit;
22647 }
22648 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22649 %}
22650 ins_pipe( pipe_slow );
22651 %}
22652
22653 // --------------------------------- ABSNEG --------------------------------------
22654
22655 instruct vabsnegF(vec dst, vec src) %{
22656 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22657 match(Set dst (AbsVF src));
22658 match(Set dst (NegVF src));
22659 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22660 ins_cost(150);
22661 ins_encode %{
22662 int opcode = this->ideal_Opcode();
22663 int vlen = Matcher::vector_length(this);
22664 if (vlen == 2) {
22665 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22666 } else {
22667 assert(vlen == 8 || vlen == 16, "required");
22668 int vlen_enc = vector_length_encoding(this);
22669 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22670 }
22671 %}
22672 ins_pipe( pipe_slow );
22673 %}
22674
22675 instruct vabsneg4F(vec dst) %{
22676 predicate(Matcher::vector_length(n) == 4);
22677 match(Set dst (AbsVF dst));
22678 match(Set dst (NegVF dst));
22679 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22680 ins_cost(150);
22681 ins_encode %{
22682 int opcode = this->ideal_Opcode();
22683 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22684 %}
22685 ins_pipe( pipe_slow );
22686 %}
22687
22688 instruct vabsnegD(vec dst, vec src) %{
22689 match(Set dst (AbsVD src));
22690 match(Set dst (NegVD src));
22691 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22692 ins_encode %{
22693 int opcode = this->ideal_Opcode();
22694 uint vlen = Matcher::vector_length(this);
22695 if (vlen == 2) {
22696 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22697 } else {
22698 int vlen_enc = vector_length_encoding(this);
22699 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22700 }
22701 %}
22702 ins_pipe( pipe_slow );
22703 %}
22704
22705 //------------------------------------- VectorTest --------------------------------------------
22706
22707 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22708 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22709 match(Set cr (VectorTest src1 src2));
22710 effect(TEMP vtmp);
22711 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22712 ins_encode %{
22713 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22714 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22715 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22716 %}
22717 ins_pipe( pipe_slow );
22718 %}
22719
22720 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22721 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22722 match(Set cr (VectorTest src1 src2));
22723 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22724 ins_encode %{
22725 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22726 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22727 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22728 %}
22729 ins_pipe( pipe_slow );
22730 %}
22731
22732 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22733 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22734 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22735 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22736 match(Set cr (VectorTest src1 src2));
22737 effect(TEMP tmp);
22738 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22739 ins_encode %{
22740 uint masklen = Matcher::vector_length(this, $src1);
22741 __ kmovwl($tmp$$Register, $src1$$KRegister);
22742 __ andl($tmp$$Register, (1 << masklen) - 1);
22743 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22744 %}
22745 ins_pipe( pipe_slow );
22746 %}
22747
22748 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22749 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22750 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22751 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22752 match(Set cr (VectorTest src1 src2));
22753 effect(TEMP tmp);
22754 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22755 ins_encode %{
22756 uint masklen = Matcher::vector_length(this, $src1);
22757 __ kmovwl($tmp$$Register, $src1$$KRegister);
22758 __ andl($tmp$$Register, (1 << masklen) - 1);
22759 %}
22760 ins_pipe( pipe_slow );
22761 %}
22762
22763 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22764 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22765 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22766 match(Set cr (VectorTest src1 src2));
22767 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22768 ins_encode %{
22769 uint masklen = Matcher::vector_length(this, $src1);
22770 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22771 %}
22772 ins_pipe( pipe_slow );
22773 %}
22774
22775 //------------------------------------- LoadMask --------------------------------------------
22776
22777 instruct loadMask(legVec dst, legVec src) %{
22778 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22779 match(Set dst (VectorLoadMask src));
22780 effect(TEMP dst);
22781 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22782 ins_encode %{
22783 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22784 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22785 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22786 %}
22787 ins_pipe( pipe_slow );
22788 %}
22789
22790 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22791 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22792 match(Set dst (VectorLoadMask src));
22793 effect(TEMP xtmp);
22794 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22795 ins_encode %{
22796 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22797 true, Assembler::AVX_512bit);
22798 %}
22799 ins_pipe( pipe_slow );
22800 %}
22801
22802 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22803 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22804 match(Set dst (VectorLoadMask src));
22805 effect(TEMP xtmp);
22806 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22807 ins_encode %{
22808 int vlen_enc = vector_length_encoding(in(1));
22809 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22810 false, vlen_enc);
22811 %}
22812 ins_pipe( pipe_slow );
22813 %}
22814
22815 //------------------------------------- StoreMask --------------------------------------------
22816
22817 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22818 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22819 match(Set dst (VectorStoreMask src size));
22820 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22821 ins_encode %{
22822 int vlen = Matcher::vector_length(this);
22823 if (vlen <= 16 && UseAVX <= 2) {
22824 assert(UseSSE >= 3, "required");
22825 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22826 } else {
22827 assert(UseAVX > 0, "required");
22828 int src_vlen_enc = vector_length_encoding(this, $src);
22829 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22830 }
22831 %}
22832 ins_pipe( pipe_slow );
22833 %}
22834
22835 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22836 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22837 match(Set dst (VectorStoreMask src size));
22838 effect(TEMP_DEF dst, TEMP xtmp);
22839 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22840 ins_encode %{
22841 int vlen_enc = Assembler::AVX_128bit;
22842 int vlen = Matcher::vector_length(this);
22843 if (vlen <= 8) {
22844 assert(UseSSE >= 3, "required");
22845 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22846 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22847 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22848 } else {
22849 assert(UseAVX > 0, "required");
22850 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22851 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22852 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22853 }
22854 %}
22855 ins_pipe( pipe_slow );
22856 %}
22857
22858 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22859 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22860 match(Set dst (VectorStoreMask src size));
22861 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22862 effect(TEMP_DEF dst, TEMP xtmp);
22863 ins_encode %{
22864 int vlen_enc = Assembler::AVX_128bit;
22865 int vlen = Matcher::vector_length(this);
22866 if (vlen <= 4) {
22867 assert(UseSSE >= 3, "required");
22868 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22869 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22870 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22871 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22872 } else {
22873 assert(UseAVX > 0, "required");
22874 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22875 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22876 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22877 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22878 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22879 }
22880 %}
22881 ins_pipe( pipe_slow );
22882 %}
22883
22884 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22885 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22886 match(Set dst (VectorStoreMask src size));
22887 effect(TEMP_DEF dst, TEMP xtmp);
22888 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22889 ins_encode %{
22890 assert(UseSSE >= 3, "required");
22891 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22892 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22893 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22894 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22895 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22896 %}
22897 ins_pipe( pipe_slow );
22898 %}
22899
22900 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22901 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22902 match(Set dst (VectorStoreMask src size));
22903 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22904 effect(TEMP_DEF dst, TEMP vtmp);
22905 ins_encode %{
22906 int vlen_enc = Assembler::AVX_128bit;
22907 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22908 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22909 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22910 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22911 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22912 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22913 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22914 %}
22915 ins_pipe( pipe_slow );
22916 %}
22917
22918 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22919 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22920 match(Set dst (VectorStoreMask src size));
22921 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22922 ins_encode %{
22923 int src_vlen_enc = vector_length_encoding(this, $src);
22924 int dst_vlen_enc = vector_length_encoding(this);
22925 if (!VM_Version::supports_avx512vl()) {
22926 src_vlen_enc = Assembler::AVX_512bit;
22927 }
22928 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22929 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22930 %}
22931 ins_pipe( pipe_slow );
22932 %}
22933
22934 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22935 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22936 match(Set dst (VectorStoreMask src size));
22937 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22938 ins_encode %{
22939 int src_vlen_enc = vector_length_encoding(this, $src);
22940 int dst_vlen_enc = vector_length_encoding(this);
22941 if (!VM_Version::supports_avx512vl()) {
22942 src_vlen_enc = Assembler::AVX_512bit;
22943 }
22944 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22945 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22946 %}
22947 ins_pipe( pipe_slow );
22948 %}
22949
22950 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22951 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22952 match(Set dst (VectorStoreMask mask size));
22953 effect(TEMP_DEF dst);
22954 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22955 ins_encode %{
22956 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22957 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22958 false, Assembler::AVX_512bit, noreg);
22959 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22960 %}
22961 ins_pipe( pipe_slow );
22962 %}
22963
22964 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22965 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22966 match(Set dst (VectorStoreMask mask size));
22967 effect(TEMP_DEF dst);
22968 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22969 ins_encode %{
22970 int dst_vlen_enc = vector_length_encoding(this);
22971 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22972 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22973 %}
22974 ins_pipe( pipe_slow );
22975 %}
22976
22977 instruct vmaskcast_evex(kReg dst) %{
22978 match(Set dst (VectorMaskCast dst));
22979 ins_cost(0);
22980 format %{ "vector_mask_cast $dst" %}
22981 ins_encode %{
22982 // empty
22983 %}
22984 ins_pipe(empty);
22985 %}
22986
22987 instruct vmaskcast(vec dst) %{
22988 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22989 match(Set dst (VectorMaskCast dst));
22990 ins_cost(0);
22991 format %{ "vector_mask_cast $dst" %}
22992 ins_encode %{
22993 // empty
22994 %}
22995 ins_pipe(empty);
22996 %}
22997
22998 instruct vmaskcast_avx(vec dst, vec src) %{
22999 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23000 match(Set dst (VectorMaskCast src));
23001 format %{ "vector_mask_cast $dst, $src" %}
23002 ins_encode %{
23003 int vlen = Matcher::vector_length(this);
23004 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23005 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23006 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23007 %}
23008 ins_pipe(pipe_slow);
23009 %}
23010
23011 //-------------------------------- Load Iota Indices ----------------------------------
23012
23013 instruct loadIotaIndices(vec dst, immI_0 src) %{
23014 match(Set dst (VectorLoadConst src));
23015 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23016 ins_encode %{
23017 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23018 BasicType bt = Matcher::vector_element_basic_type(this);
23019 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23020 %}
23021 ins_pipe( pipe_slow );
23022 %}
23023
23024 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23025 match(Set dst (PopulateIndex src1 src2));
23026 effect(TEMP dst, TEMP vtmp);
23027 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23028 ins_encode %{
23029 assert($src2$$constant == 1, "required");
23030 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23031 int vlen_enc = vector_length_encoding(this);
23032 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23033 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23034 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23035 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23036 %}
23037 ins_pipe( pipe_slow );
23038 %}
23039
23040 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23041 match(Set dst (PopulateIndex src1 src2));
23042 effect(TEMP dst, TEMP vtmp);
23043 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23044 ins_encode %{
23045 assert($src2$$constant == 1, "required");
23046 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23047 int vlen_enc = vector_length_encoding(this);
23048 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23049 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23050 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23051 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23052 %}
23053 ins_pipe( pipe_slow );
23054 %}
23055
23056 //-------------------------------- Rearrange ----------------------------------
23057
23058 // LoadShuffle/Rearrange for Byte
23059 instruct rearrangeB(vec dst, vec shuffle) %{
23060 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23061 Matcher::vector_length(n) < 32);
23062 match(Set dst (VectorRearrange dst shuffle));
23063 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23064 ins_encode %{
23065 assert(UseSSE >= 4, "required");
23066 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23067 %}
23068 ins_pipe( pipe_slow );
23069 %}
23070
23071 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23072 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23073 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23074 match(Set dst (VectorRearrange src shuffle));
23075 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23076 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23077 ins_encode %{
23078 assert(UseAVX >= 2, "required");
23079 // Swap src into vtmp1
23080 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23081 // Shuffle swapped src to get entries from other 128 bit lane
23082 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23083 // Shuffle original src to get entries from self 128 bit lane
23084 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23085 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23086 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23087 // Perform the blend
23088 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23089 %}
23090 ins_pipe( pipe_slow );
23091 %}
23092
23093
23094 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23095 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23096 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23097 match(Set dst (VectorRearrange src shuffle));
23098 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23099 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23100 ins_encode %{
23101 int vlen_enc = vector_length_encoding(this);
23102 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23103 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23104 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23105 %}
23106 ins_pipe( pipe_slow );
23107 %}
23108
23109 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23110 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23111 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23112 match(Set dst (VectorRearrange src shuffle));
23113 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23114 ins_encode %{
23115 int vlen_enc = vector_length_encoding(this);
23116 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23117 %}
23118 ins_pipe( pipe_slow );
23119 %}
23120
23121 // LoadShuffle/Rearrange for Short
23122
23123 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23124 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23125 !VM_Version::supports_avx512bw());
23126 match(Set dst (VectorLoadShuffle src));
23127 effect(TEMP dst, TEMP vtmp);
23128 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23129 ins_encode %{
23130 // Create a byte shuffle mask from short shuffle mask
23131 // only byte shuffle instruction available on these platforms
23132 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23133 if (UseAVX == 0) {
23134 assert(vlen_in_bytes <= 16, "required");
23135 // Multiply each shuffle by two to get byte index
23136 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23137 __ psllw($vtmp$$XMMRegister, 1);
23138
23139 // Duplicate to create 2 copies of byte index
23140 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23141 __ psllw($dst$$XMMRegister, 8);
23142 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23143
23144 // Add one to get alternate byte index
23145 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23146 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23147 } else {
23148 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23149 int vlen_enc = vector_length_encoding(this);
23150 // Multiply each shuffle by two to get byte index
23151 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23152
23153 // Duplicate to create 2 copies of byte index
23154 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23155 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23156
23157 // Add one to get alternate byte index
23158 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23159 }
23160 %}
23161 ins_pipe( pipe_slow );
23162 %}
23163
23164 instruct rearrangeS(vec dst, vec shuffle) %{
23165 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23166 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23167 match(Set dst (VectorRearrange dst shuffle));
23168 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23169 ins_encode %{
23170 assert(UseSSE >= 4, "required");
23171 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23172 %}
23173 ins_pipe( pipe_slow );
23174 %}
23175
23176 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23177 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23178 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23179 match(Set dst (VectorRearrange src shuffle));
23180 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23181 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23182 ins_encode %{
23183 assert(UseAVX >= 2, "required");
23184 // Swap src into vtmp1
23185 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23186 // Shuffle swapped src to get entries from other 128 bit lane
23187 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23188 // Shuffle original src to get entries from self 128 bit lane
23189 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23190 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23191 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23192 // Perform the blend
23193 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23194 %}
23195 ins_pipe( pipe_slow );
23196 %}
23197
23198 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23199 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23200 VM_Version::supports_avx512bw());
23201 match(Set dst (VectorRearrange src shuffle));
23202 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23203 ins_encode %{
23204 int vlen_enc = vector_length_encoding(this);
23205 if (!VM_Version::supports_avx512vl()) {
23206 vlen_enc = Assembler::AVX_512bit;
23207 }
23208 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23209 %}
23210 ins_pipe( pipe_slow );
23211 %}
23212
23213 // LoadShuffle/Rearrange for Integer and Float
23214
23215 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23216 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23217 Matcher::vector_length(n) == 4 && UseAVX == 0);
23218 match(Set dst (VectorLoadShuffle src));
23219 effect(TEMP dst, TEMP vtmp);
23220 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23221 ins_encode %{
23222 assert(UseSSE >= 4, "required");
23223
23224 // Create a byte shuffle mask from int shuffle mask
23225 // only byte shuffle instruction available on these platforms
23226
23227 // Duplicate and multiply each shuffle by 4
23228 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23229 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23230 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23231 __ psllw($vtmp$$XMMRegister, 2);
23232
23233 // Duplicate again to create 4 copies of byte index
23234 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23235 __ psllw($dst$$XMMRegister, 8);
23236 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23237
23238 // Add 3,2,1,0 to get alternate byte index
23239 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23240 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23241 %}
23242 ins_pipe( pipe_slow );
23243 %}
23244
23245 instruct rearrangeI(vec dst, vec shuffle) %{
23246 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23247 UseAVX == 0);
23248 match(Set dst (VectorRearrange dst shuffle));
23249 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23250 ins_encode %{
23251 assert(UseSSE >= 4, "required");
23252 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23253 %}
23254 ins_pipe( pipe_slow );
23255 %}
23256
23257 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23258 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23259 UseAVX > 0);
23260 match(Set dst (VectorRearrange src shuffle));
23261 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23262 ins_encode %{
23263 int vlen_enc = vector_length_encoding(this);
23264 BasicType bt = Matcher::vector_element_basic_type(this);
23265 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23266 %}
23267 ins_pipe( pipe_slow );
23268 %}
23269
23270 // LoadShuffle/Rearrange for Long and Double
23271
23272 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23273 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23274 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23275 match(Set dst (VectorLoadShuffle src));
23276 effect(TEMP dst, TEMP vtmp);
23277 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23278 ins_encode %{
23279 assert(UseAVX >= 2, "required");
23280
23281 int vlen_enc = vector_length_encoding(this);
23282 // Create a double word shuffle mask from long shuffle mask
23283 // only double word shuffle instruction available on these platforms
23284
23285 // Multiply each shuffle by two to get double word index
23286 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23287
23288 // Duplicate each double word shuffle
23289 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23290 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23291
23292 // Add one to get alternate double word index
23293 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23294 %}
23295 ins_pipe( pipe_slow );
23296 %}
23297
23298 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23299 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23300 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23301 match(Set dst (VectorRearrange src shuffle));
23302 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23303 ins_encode %{
23304 assert(UseAVX >= 2, "required");
23305
23306 int vlen_enc = vector_length_encoding(this);
23307 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23308 %}
23309 ins_pipe( pipe_slow );
23310 %}
23311
23312 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23313 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23314 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23315 match(Set dst (VectorRearrange src shuffle));
23316 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23317 ins_encode %{
23318 assert(UseAVX > 2, "required");
23319
23320 int vlen_enc = vector_length_encoding(this);
23321 if (vlen_enc == Assembler::AVX_128bit) {
23322 vlen_enc = Assembler::AVX_256bit;
23323 }
23324 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23325 %}
23326 ins_pipe( pipe_slow );
23327 %}
23328
23329 // --------------------------------- FMA --------------------------------------
23330 // a * b + c
23331
23332 instruct vfmaF_reg(vec a, vec b, vec c) %{
23333 match(Set c (FmaVF c (Binary a b)));
23334 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23335 ins_cost(150);
23336 ins_encode %{
23337 assert(UseFMA, "not enabled");
23338 int vlen_enc = vector_length_encoding(this);
23339 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23340 %}
23341 ins_pipe( pipe_slow );
23342 %}
23343
23344 instruct vfmaF_mem(vec a, memory b, vec c) %{
23345 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23346 match(Set c (FmaVF c (Binary a (LoadVector b))));
23347 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23348 ins_cost(150);
23349 ins_encode %{
23350 assert(UseFMA, "not enabled");
23351 int vlen_enc = vector_length_encoding(this);
23352 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23353 %}
23354 ins_pipe( pipe_slow );
23355 %}
23356
23357 instruct vfmaD_reg(vec a, vec b, vec c) %{
23358 match(Set c (FmaVD c (Binary a b)));
23359 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23360 ins_cost(150);
23361 ins_encode %{
23362 assert(UseFMA, "not enabled");
23363 int vlen_enc = vector_length_encoding(this);
23364 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23365 %}
23366 ins_pipe( pipe_slow );
23367 %}
23368
23369 instruct vfmaD_mem(vec a, memory b, vec c) %{
23370 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23371 match(Set c (FmaVD c (Binary a (LoadVector b))));
23372 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23373 ins_cost(150);
23374 ins_encode %{
23375 assert(UseFMA, "not enabled");
23376 int vlen_enc = vector_length_encoding(this);
23377 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23378 %}
23379 ins_pipe( pipe_slow );
23380 %}
23381
23382 // --------------------------------- Vector Multiply Add --------------------------------------
23383
23384 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23385 predicate(UseAVX == 0);
23386 match(Set dst (MulAddVS2VI dst src1));
23387 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23388 ins_encode %{
23389 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23390 %}
23391 ins_pipe( pipe_slow );
23392 %}
23393
23394 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23395 predicate(UseAVX > 0);
23396 match(Set dst (MulAddVS2VI src1 src2));
23397 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23398 ins_encode %{
23399 int vlen_enc = vector_length_encoding(this);
23400 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23401 %}
23402 ins_pipe( pipe_slow );
23403 %}
23404
23405 // --------------------------------- Vector Multiply Add Add ----------------------------------
23406
23407 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23408 predicate(VM_Version::supports_avx512_vnni());
23409 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23410 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23411 ins_encode %{
23412 assert(UseAVX > 2, "required");
23413 int vlen_enc = vector_length_encoding(this);
23414 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23415 %}
23416 ins_pipe( pipe_slow );
23417 ins_cost(10);
23418 %}
23419
23420 // --------------------------------- PopCount --------------------------------------
23421
23422 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23423 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23424 match(Set dst (PopCountVI src));
23425 match(Set dst (PopCountVL src));
23426 format %{ "vector_popcount_integral $dst, $src" %}
23427 ins_encode %{
23428 int opcode = this->ideal_Opcode();
23429 int vlen_enc = vector_length_encoding(this, $src);
23430 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23431 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23432 %}
23433 ins_pipe( pipe_slow );
23434 %}
23435
23436 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23437 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23438 match(Set dst (PopCountVI src mask));
23439 match(Set dst (PopCountVL src mask));
23440 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23441 ins_encode %{
23442 int vlen_enc = vector_length_encoding(this, $src);
23443 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23444 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23445 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23446 %}
23447 ins_pipe( pipe_slow );
23448 %}
23449
23450 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23451 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23452 match(Set dst (PopCountVI src));
23453 match(Set dst (PopCountVL src));
23454 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23455 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23456 ins_encode %{
23457 int opcode = this->ideal_Opcode();
23458 int vlen_enc = vector_length_encoding(this, $src);
23459 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23460 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23461 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23462 %}
23463 ins_pipe( pipe_slow );
23464 %}
23465
23466 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23467
23468 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23469 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23470 Matcher::vector_length_in_bytes(n->in(1))));
23471 match(Set dst (CountTrailingZerosV src));
23472 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23473 ins_cost(400);
23474 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23475 ins_encode %{
23476 int vlen_enc = vector_length_encoding(this, $src);
23477 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23478 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23479 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23480 %}
23481 ins_pipe( pipe_slow );
23482 %}
23483
23484 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23485 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23486 VM_Version::supports_avx512cd() &&
23487 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23488 match(Set dst (CountTrailingZerosV src));
23489 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23490 ins_cost(400);
23491 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23492 ins_encode %{
23493 int vlen_enc = vector_length_encoding(this, $src);
23494 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23495 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23496 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23497 %}
23498 ins_pipe( pipe_slow );
23499 %}
23500
23501 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23502 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23503 match(Set dst (CountTrailingZerosV src));
23504 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23505 ins_cost(400);
23506 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23507 ins_encode %{
23508 int vlen_enc = vector_length_encoding(this, $src);
23509 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23510 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23511 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23512 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23513 %}
23514 ins_pipe( pipe_slow );
23515 %}
23516
23517 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23518 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23519 match(Set dst (CountTrailingZerosV src));
23520 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23521 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23522 ins_encode %{
23523 int vlen_enc = vector_length_encoding(this, $src);
23524 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23525 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23526 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23527 %}
23528 ins_pipe( pipe_slow );
23529 %}
23530
23531
23532 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23533
23534 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23535 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23536 effect(TEMP dst);
23537 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23538 ins_encode %{
23539 int vector_len = vector_length_encoding(this);
23540 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23541 %}
23542 ins_pipe( pipe_slow );
23543 %}
23544
23545 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23546 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23547 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23548 effect(TEMP dst);
23549 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23550 ins_encode %{
23551 int vector_len = vector_length_encoding(this);
23552 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23553 %}
23554 ins_pipe( pipe_slow );
23555 %}
23556
23557 // --------------------------------- Rotation Operations ----------------------------------
23558 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23559 match(Set dst (RotateLeftV src shift));
23560 match(Set dst (RotateRightV src shift));
23561 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23562 ins_encode %{
23563 int opcode = this->ideal_Opcode();
23564 int vector_len = vector_length_encoding(this);
23565 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23566 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23567 %}
23568 ins_pipe( pipe_slow );
23569 %}
23570
23571 instruct vprorate(vec dst, vec src, vec shift) %{
23572 match(Set dst (RotateLeftV src shift));
23573 match(Set dst (RotateRightV src shift));
23574 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23575 ins_encode %{
23576 int opcode = this->ideal_Opcode();
23577 int vector_len = vector_length_encoding(this);
23578 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23579 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23580 %}
23581 ins_pipe( pipe_slow );
23582 %}
23583
23584 // ---------------------------------- Masked Operations ------------------------------------
23585 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23586 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23587 match(Set dst (LoadVectorMasked mem mask));
23588 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23589 ins_encode %{
23590 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23591 int vlen_enc = vector_length_encoding(this);
23592 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23593 %}
23594 ins_pipe( pipe_slow );
23595 %}
23596
23597
23598 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23599 predicate(n->in(3)->bottom_type()->isa_vectmask());
23600 match(Set dst (LoadVectorMasked mem mask));
23601 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23602 ins_encode %{
23603 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23604 int vector_len = vector_length_encoding(this);
23605 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23606 %}
23607 ins_pipe( pipe_slow );
23608 %}
23609
23610 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23611 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23612 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23613 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23614 ins_encode %{
23615 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23616 int vlen_enc = vector_length_encoding(src_node);
23617 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23618 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23619 %}
23620 ins_pipe( pipe_slow );
23621 %}
23622
23623 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23624 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23625 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23626 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23627 ins_encode %{
23628 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23629 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23630 int vlen_enc = vector_length_encoding(src_node);
23631 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23632 %}
23633 ins_pipe( pipe_slow );
23634 %}
23635
23636 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23637 match(Set addr (VerifyVectorAlignment addr mask));
23638 effect(KILL cr);
23639 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23640 ins_encode %{
23641 Label Lskip;
23642 // check if masked bits of addr are zero
23643 __ testq($addr$$Register, $mask$$constant);
23644 __ jccb(Assembler::equal, Lskip);
23645 __ stop("verify_vector_alignment found a misaligned vector memory access");
23646 __ bind(Lskip);
23647 %}
23648 ins_pipe(pipe_slow);
23649 %}
23650
23651 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23652 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23653 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23654 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23655 ins_encode %{
23656 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23657 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23658
23659 Label DONE;
23660 int vlen_enc = vector_length_encoding(this, $src1);
23661 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23662
23663 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23664 __ mov64($dst$$Register, -1L);
23665 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23666 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23667 __ jccb(Assembler::carrySet, DONE);
23668 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23669 __ notq($dst$$Register);
23670 __ tzcntq($dst$$Register, $dst$$Register);
23671 __ bind(DONE);
23672 %}
23673 ins_pipe( pipe_slow );
23674 %}
23675
23676
23677 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23678 match(Set dst (VectorMaskGen len));
23679 effect(TEMP temp, KILL cr);
23680 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23681 ins_encode %{
23682 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23683 %}
23684 ins_pipe( pipe_slow );
23685 %}
23686
23687 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23688 match(Set dst (VectorMaskGen len));
23689 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23690 effect(TEMP temp);
23691 ins_encode %{
23692 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23693 __ kmovql($dst$$KRegister, $temp$$Register);
23694 %}
23695 ins_pipe( pipe_slow );
23696 %}
23697
23698 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23699 predicate(n->in(1)->bottom_type()->isa_vectmask());
23700 match(Set dst (VectorMaskToLong mask));
23701 effect(TEMP dst, KILL cr);
23702 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23703 ins_encode %{
23704 int opcode = this->ideal_Opcode();
23705 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23706 int mask_len = Matcher::vector_length(this, $mask);
23707 int mask_size = mask_len * type2aelembytes(mbt);
23708 int vlen_enc = vector_length_encoding(this, $mask);
23709 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23710 $dst$$Register, mask_len, mask_size, vlen_enc);
23711 %}
23712 ins_pipe( pipe_slow );
23713 %}
23714
23715 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23716 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23717 match(Set dst (VectorMaskToLong mask));
23718 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23719 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23720 ins_encode %{
23721 int opcode = this->ideal_Opcode();
23722 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23723 int mask_len = Matcher::vector_length(this, $mask);
23724 int vlen_enc = vector_length_encoding(this, $mask);
23725 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23726 $dst$$Register, mask_len, mbt, vlen_enc);
23727 %}
23728 ins_pipe( pipe_slow );
23729 %}
23730
23731 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23732 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23733 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23734 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23735 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23736 ins_encode %{
23737 int opcode = this->ideal_Opcode();
23738 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23739 int mask_len = Matcher::vector_length(this, $mask);
23740 int vlen_enc = vector_length_encoding(this, $mask);
23741 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23742 $dst$$Register, mask_len, mbt, vlen_enc);
23743 %}
23744 ins_pipe( pipe_slow );
23745 %}
23746
23747 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23748 predicate(n->in(1)->bottom_type()->isa_vectmask());
23749 match(Set dst (VectorMaskTrueCount mask));
23750 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23751 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23752 ins_encode %{
23753 int opcode = this->ideal_Opcode();
23754 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23755 int mask_len = Matcher::vector_length(this, $mask);
23756 int mask_size = mask_len * type2aelembytes(mbt);
23757 int vlen_enc = vector_length_encoding(this, $mask);
23758 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23759 $tmp$$Register, mask_len, mask_size, vlen_enc);
23760 %}
23761 ins_pipe( pipe_slow );
23762 %}
23763
23764 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23765 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23766 match(Set dst (VectorMaskTrueCount mask));
23767 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23768 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23769 ins_encode %{
23770 int opcode = this->ideal_Opcode();
23771 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23772 int mask_len = Matcher::vector_length(this, $mask);
23773 int vlen_enc = vector_length_encoding(this, $mask);
23774 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23775 $tmp$$Register, mask_len, mbt, vlen_enc);
23776 %}
23777 ins_pipe( pipe_slow );
23778 %}
23779
23780 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23781 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23782 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23783 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23784 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23785 ins_encode %{
23786 int opcode = this->ideal_Opcode();
23787 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23788 int mask_len = Matcher::vector_length(this, $mask);
23789 int vlen_enc = vector_length_encoding(this, $mask);
23790 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23791 $tmp$$Register, mask_len, mbt, vlen_enc);
23792 %}
23793 ins_pipe( pipe_slow );
23794 %}
23795
23796 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23797 predicate(n->in(1)->bottom_type()->isa_vectmask());
23798 match(Set dst (VectorMaskFirstTrue mask));
23799 match(Set dst (VectorMaskLastTrue mask));
23800 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23801 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23802 ins_encode %{
23803 int opcode = this->ideal_Opcode();
23804 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23805 int mask_len = Matcher::vector_length(this, $mask);
23806 int mask_size = mask_len * type2aelembytes(mbt);
23807 int vlen_enc = vector_length_encoding(this, $mask);
23808 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23809 $tmp$$Register, mask_len, mask_size, vlen_enc);
23810 %}
23811 ins_pipe( pipe_slow );
23812 %}
23813
23814 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23815 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23816 match(Set dst (VectorMaskFirstTrue mask));
23817 match(Set dst (VectorMaskLastTrue mask));
23818 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23819 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23820 ins_encode %{
23821 int opcode = this->ideal_Opcode();
23822 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23823 int mask_len = Matcher::vector_length(this, $mask);
23824 int vlen_enc = vector_length_encoding(this, $mask);
23825 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23826 $tmp$$Register, mask_len, mbt, vlen_enc);
23827 %}
23828 ins_pipe( pipe_slow );
23829 %}
23830
23831 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23832 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23833 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23834 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23835 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23836 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23837 ins_encode %{
23838 int opcode = this->ideal_Opcode();
23839 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23840 int mask_len = Matcher::vector_length(this, $mask);
23841 int vlen_enc = vector_length_encoding(this, $mask);
23842 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23843 $tmp$$Register, mask_len, mbt, vlen_enc);
23844 %}
23845 ins_pipe( pipe_slow );
23846 %}
23847
23848 // --------------------------------- Compress/Expand Operations ---------------------------
23849 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23850 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23851 match(Set dst (CompressV src mask));
23852 match(Set dst (ExpandV src mask));
23853 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23854 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23855 ins_encode %{
23856 int opcode = this->ideal_Opcode();
23857 int vlen_enc = vector_length_encoding(this);
23858 BasicType bt = Matcher::vector_element_basic_type(this);
23859 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23860 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23861 %}
23862 ins_pipe( pipe_slow );
23863 %}
23864
23865 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23866 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23867 match(Set dst (CompressV src mask));
23868 match(Set dst (ExpandV src mask));
23869 format %{ "vector_compress_expand $dst, $src, $mask" %}
23870 ins_encode %{
23871 int opcode = this->ideal_Opcode();
23872 int vector_len = vector_length_encoding(this);
23873 BasicType bt = Matcher::vector_element_basic_type(this);
23874 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23875 %}
23876 ins_pipe( pipe_slow );
23877 %}
23878
23879 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23880 match(Set dst (CompressM mask));
23881 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23882 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23883 ins_encode %{
23884 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23885 int mask_len = Matcher::vector_length(this);
23886 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23887 %}
23888 ins_pipe( pipe_slow );
23889 %}
23890
23891 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23892
23893 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23894 predicate(!VM_Version::supports_gfni());
23895 match(Set dst (ReverseV src));
23896 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23897 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23898 ins_encode %{
23899 int vec_enc = vector_length_encoding(this);
23900 BasicType bt = Matcher::vector_element_basic_type(this);
23901 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23902 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23903 %}
23904 ins_pipe( pipe_slow );
23905 %}
23906
23907 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23908 predicate(VM_Version::supports_gfni());
23909 match(Set dst (ReverseV src));
23910 effect(TEMP dst, TEMP xtmp);
23911 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23912 ins_encode %{
23913 int vec_enc = vector_length_encoding(this);
23914 BasicType bt = Matcher::vector_element_basic_type(this);
23915 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23916 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23917 $xtmp$$XMMRegister);
23918 %}
23919 ins_pipe( pipe_slow );
23920 %}
23921
23922 instruct vreverse_byte_reg(vec dst, vec src) %{
23923 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23924 match(Set dst (ReverseBytesV src));
23925 effect(TEMP dst);
23926 format %{ "vector_reverse_byte $dst, $src" %}
23927 ins_encode %{
23928 int vec_enc = vector_length_encoding(this);
23929 BasicType bt = Matcher::vector_element_basic_type(this);
23930 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23931 %}
23932 ins_pipe( pipe_slow );
23933 %}
23934
23935 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23936 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23937 match(Set dst (ReverseBytesV src));
23938 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23939 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23940 ins_encode %{
23941 int vec_enc = vector_length_encoding(this);
23942 BasicType bt = Matcher::vector_element_basic_type(this);
23943 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23944 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23945 %}
23946 ins_pipe( pipe_slow );
23947 %}
23948
23949 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23950
23951 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23952 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23953 Matcher::vector_length_in_bytes(n->in(1))));
23954 match(Set dst (CountLeadingZerosV src));
23955 format %{ "vector_count_leading_zeros $dst, $src" %}
23956 ins_encode %{
23957 int vlen_enc = vector_length_encoding(this, $src);
23958 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23959 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23960 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23961 %}
23962 ins_pipe( pipe_slow );
23963 %}
23964
23965 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23966 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23967 Matcher::vector_length_in_bytes(n->in(1))));
23968 match(Set dst (CountLeadingZerosV src mask));
23969 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23970 ins_encode %{
23971 int vlen_enc = vector_length_encoding(this, $src);
23972 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23973 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23974 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23975 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23976 %}
23977 ins_pipe( pipe_slow );
23978 %}
23979
23980 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23981 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23982 VM_Version::supports_avx512cd() &&
23983 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23984 match(Set dst (CountLeadingZerosV src));
23985 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23986 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23987 ins_encode %{
23988 int vlen_enc = vector_length_encoding(this, $src);
23989 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23990 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23991 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
23992 %}
23993 ins_pipe( pipe_slow );
23994 %}
23995
23996 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
23997 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23998 match(Set dst (CountLeadingZerosV src));
23999 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24000 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24001 ins_encode %{
24002 int vlen_enc = vector_length_encoding(this, $src);
24003 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24004 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24005 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24006 $rtmp$$Register, true, vlen_enc);
24007 %}
24008 ins_pipe( pipe_slow );
24009 %}
24010
24011 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24012 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24013 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24014 match(Set dst (CountLeadingZerosV src));
24015 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24016 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24017 ins_encode %{
24018 int vlen_enc = vector_length_encoding(this, $src);
24019 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24020 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24021 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24022 %}
24023 ins_pipe( pipe_slow );
24024 %}
24025
24026 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24027 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24028 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24029 match(Set dst (CountLeadingZerosV src));
24030 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24031 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24032 ins_encode %{
24033 int vlen_enc = vector_length_encoding(this, $src);
24034 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24036 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24037 %}
24038 ins_pipe( pipe_slow );
24039 %}
24040
24041 // ---------------------------------- Vector Masked Operations ------------------------------------
24042
24043 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24044 match(Set dst (AddVB (Binary dst src2) mask));
24045 match(Set dst (AddVS (Binary dst src2) mask));
24046 match(Set dst (AddVI (Binary dst src2) mask));
24047 match(Set dst (AddVL (Binary dst src2) mask));
24048 match(Set dst (AddVF (Binary dst src2) mask));
24049 match(Set dst (AddVD (Binary dst src2) mask));
24050 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24051 ins_encode %{
24052 int vlen_enc = vector_length_encoding(this);
24053 BasicType bt = Matcher::vector_element_basic_type(this);
24054 int opc = this->ideal_Opcode();
24055 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24056 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24057 %}
24058 ins_pipe( pipe_slow );
24059 %}
24060
24061 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24062 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24063 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24064 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24065 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24066 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24067 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24068 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24069 ins_encode %{
24070 int vlen_enc = vector_length_encoding(this);
24071 BasicType bt = Matcher::vector_element_basic_type(this);
24072 int opc = this->ideal_Opcode();
24073 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24074 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24075 %}
24076 ins_pipe( pipe_slow );
24077 %}
24078
24079 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24080 match(Set dst (XorV (Binary dst src2) mask));
24081 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24082 ins_encode %{
24083 int vlen_enc = vector_length_encoding(this);
24084 BasicType bt = Matcher::vector_element_basic_type(this);
24085 int opc = this->ideal_Opcode();
24086 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24087 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24088 %}
24089 ins_pipe( pipe_slow );
24090 %}
24091
24092 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24093 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24094 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24095 ins_encode %{
24096 int vlen_enc = vector_length_encoding(this);
24097 BasicType bt = Matcher::vector_element_basic_type(this);
24098 int opc = this->ideal_Opcode();
24099 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24100 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24101 %}
24102 ins_pipe( pipe_slow );
24103 %}
24104
24105 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24106 match(Set dst (OrV (Binary dst src2) mask));
24107 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24108 ins_encode %{
24109 int vlen_enc = vector_length_encoding(this);
24110 BasicType bt = Matcher::vector_element_basic_type(this);
24111 int opc = this->ideal_Opcode();
24112 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24113 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24114 %}
24115 ins_pipe( pipe_slow );
24116 %}
24117
24118 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24119 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24120 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24121 ins_encode %{
24122 int vlen_enc = vector_length_encoding(this);
24123 BasicType bt = Matcher::vector_element_basic_type(this);
24124 int opc = this->ideal_Opcode();
24125 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24126 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24127 %}
24128 ins_pipe( pipe_slow );
24129 %}
24130
24131 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24132 match(Set dst (AndV (Binary dst src2) mask));
24133 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24134 ins_encode %{
24135 int vlen_enc = vector_length_encoding(this);
24136 BasicType bt = Matcher::vector_element_basic_type(this);
24137 int opc = this->ideal_Opcode();
24138 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24139 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24140 %}
24141 ins_pipe( pipe_slow );
24142 %}
24143
24144 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24145 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24146 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24147 ins_encode %{
24148 int vlen_enc = vector_length_encoding(this);
24149 BasicType bt = Matcher::vector_element_basic_type(this);
24150 int opc = this->ideal_Opcode();
24151 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24152 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24153 %}
24154 ins_pipe( pipe_slow );
24155 %}
24156
24157 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24158 match(Set dst (SubVB (Binary dst src2) mask));
24159 match(Set dst (SubVS (Binary dst src2) mask));
24160 match(Set dst (SubVI (Binary dst src2) mask));
24161 match(Set dst (SubVL (Binary dst src2) mask));
24162 match(Set dst (SubVF (Binary dst src2) mask));
24163 match(Set dst (SubVD (Binary dst src2) mask));
24164 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24165 ins_encode %{
24166 int vlen_enc = vector_length_encoding(this);
24167 BasicType bt = Matcher::vector_element_basic_type(this);
24168 int opc = this->ideal_Opcode();
24169 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24170 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24171 %}
24172 ins_pipe( pipe_slow );
24173 %}
24174
24175 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24176 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24177 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24178 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24179 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24180 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24181 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24182 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24183 ins_encode %{
24184 int vlen_enc = vector_length_encoding(this);
24185 BasicType bt = Matcher::vector_element_basic_type(this);
24186 int opc = this->ideal_Opcode();
24187 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24188 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24189 %}
24190 ins_pipe( pipe_slow );
24191 %}
24192
24193 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24194 match(Set dst (MulVS (Binary dst src2) mask));
24195 match(Set dst (MulVI (Binary dst src2) mask));
24196 match(Set dst (MulVL (Binary dst src2) mask));
24197 match(Set dst (MulVF (Binary dst src2) mask));
24198 match(Set dst (MulVD (Binary dst src2) mask));
24199 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24200 ins_encode %{
24201 int vlen_enc = vector_length_encoding(this);
24202 BasicType bt = Matcher::vector_element_basic_type(this);
24203 int opc = this->ideal_Opcode();
24204 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24205 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24206 %}
24207 ins_pipe( pipe_slow );
24208 %}
24209
24210 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24211 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24212 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24213 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24214 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24215 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24216 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24217 ins_encode %{
24218 int vlen_enc = vector_length_encoding(this);
24219 BasicType bt = Matcher::vector_element_basic_type(this);
24220 int opc = this->ideal_Opcode();
24221 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24222 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24223 %}
24224 ins_pipe( pipe_slow );
24225 %}
24226
24227 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24228 match(Set dst (SqrtVF dst mask));
24229 match(Set dst (SqrtVD dst mask));
24230 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24231 ins_encode %{
24232 int vlen_enc = vector_length_encoding(this);
24233 BasicType bt = Matcher::vector_element_basic_type(this);
24234 int opc = this->ideal_Opcode();
24235 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24236 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24237 %}
24238 ins_pipe( pipe_slow );
24239 %}
24240
24241 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24242 match(Set dst (DivVF (Binary dst src2) mask));
24243 match(Set dst (DivVD (Binary dst src2) mask));
24244 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24245 ins_encode %{
24246 int vlen_enc = vector_length_encoding(this);
24247 BasicType bt = Matcher::vector_element_basic_type(this);
24248 int opc = this->ideal_Opcode();
24249 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24250 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24251 %}
24252 ins_pipe( pipe_slow );
24253 %}
24254
24255 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24256 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24257 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24258 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24259 ins_encode %{
24260 int vlen_enc = vector_length_encoding(this);
24261 BasicType bt = Matcher::vector_element_basic_type(this);
24262 int opc = this->ideal_Opcode();
24263 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24264 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24265 %}
24266 ins_pipe( pipe_slow );
24267 %}
24268
24269
24270 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24271 match(Set dst (RotateLeftV (Binary dst shift) mask));
24272 match(Set dst (RotateRightV (Binary dst shift) mask));
24273 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24274 ins_encode %{
24275 int vlen_enc = vector_length_encoding(this);
24276 BasicType bt = Matcher::vector_element_basic_type(this);
24277 int opc = this->ideal_Opcode();
24278 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24279 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24280 %}
24281 ins_pipe( pipe_slow );
24282 %}
24283
24284 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24285 match(Set dst (RotateLeftV (Binary dst src2) mask));
24286 match(Set dst (RotateRightV (Binary dst src2) mask));
24287 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24288 ins_encode %{
24289 int vlen_enc = vector_length_encoding(this);
24290 BasicType bt = Matcher::vector_element_basic_type(this);
24291 int opc = this->ideal_Opcode();
24292 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24293 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24294 %}
24295 ins_pipe( pipe_slow );
24296 %}
24297
24298 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24299 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24300 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24301 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24302 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24303 ins_encode %{
24304 int vlen_enc = vector_length_encoding(this);
24305 BasicType bt = Matcher::vector_element_basic_type(this);
24306 int opc = this->ideal_Opcode();
24307 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24308 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24309 %}
24310 ins_pipe( pipe_slow );
24311 %}
24312
24313 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24314 predicate(!n->as_ShiftV()->is_var_shift());
24315 match(Set dst (LShiftVS (Binary dst src2) mask));
24316 match(Set dst (LShiftVI (Binary dst src2) mask));
24317 match(Set dst (LShiftVL (Binary dst src2) mask));
24318 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24319 ins_encode %{
24320 int vlen_enc = vector_length_encoding(this);
24321 BasicType bt = Matcher::vector_element_basic_type(this);
24322 int opc = this->ideal_Opcode();
24323 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24324 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24325 %}
24326 ins_pipe( pipe_slow );
24327 %}
24328
24329 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24330 predicate(n->as_ShiftV()->is_var_shift());
24331 match(Set dst (LShiftVS (Binary dst src2) mask));
24332 match(Set dst (LShiftVI (Binary dst src2) mask));
24333 match(Set dst (LShiftVL (Binary dst src2) mask));
24334 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24335 ins_encode %{
24336 int vlen_enc = vector_length_encoding(this);
24337 BasicType bt = Matcher::vector_element_basic_type(this);
24338 int opc = this->ideal_Opcode();
24339 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24341 %}
24342 ins_pipe( pipe_slow );
24343 %}
24344
24345 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24346 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24347 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24348 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24349 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24350 ins_encode %{
24351 int vlen_enc = vector_length_encoding(this);
24352 BasicType bt = Matcher::vector_element_basic_type(this);
24353 int opc = this->ideal_Opcode();
24354 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24355 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24356 %}
24357 ins_pipe( pipe_slow );
24358 %}
24359
24360 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24361 predicate(!n->as_ShiftV()->is_var_shift());
24362 match(Set dst (RShiftVS (Binary dst src2) mask));
24363 match(Set dst (RShiftVI (Binary dst src2) mask));
24364 match(Set dst (RShiftVL (Binary dst src2) mask));
24365 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24366 ins_encode %{
24367 int vlen_enc = vector_length_encoding(this);
24368 BasicType bt = Matcher::vector_element_basic_type(this);
24369 int opc = this->ideal_Opcode();
24370 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24371 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24372 %}
24373 ins_pipe( pipe_slow );
24374 %}
24375
24376 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24377 predicate(n->as_ShiftV()->is_var_shift());
24378 match(Set dst (RShiftVS (Binary dst src2) mask));
24379 match(Set dst (RShiftVI (Binary dst src2) mask));
24380 match(Set dst (RShiftVL (Binary dst src2) mask));
24381 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24382 ins_encode %{
24383 int vlen_enc = vector_length_encoding(this);
24384 BasicType bt = Matcher::vector_element_basic_type(this);
24385 int opc = this->ideal_Opcode();
24386 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24387 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24388 %}
24389 ins_pipe( pipe_slow );
24390 %}
24391
24392 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24393 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24394 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24395 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24396 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24397 ins_encode %{
24398 int vlen_enc = vector_length_encoding(this);
24399 BasicType bt = Matcher::vector_element_basic_type(this);
24400 int opc = this->ideal_Opcode();
24401 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24402 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24403 %}
24404 ins_pipe( pipe_slow );
24405 %}
24406
24407 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24408 predicate(!n->as_ShiftV()->is_var_shift());
24409 match(Set dst (URShiftVS (Binary dst src2) mask));
24410 match(Set dst (URShiftVI (Binary dst src2) mask));
24411 match(Set dst (URShiftVL (Binary dst src2) mask));
24412 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24413 ins_encode %{
24414 int vlen_enc = vector_length_encoding(this);
24415 BasicType bt = Matcher::vector_element_basic_type(this);
24416 int opc = this->ideal_Opcode();
24417 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24418 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24419 %}
24420 ins_pipe( pipe_slow );
24421 %}
24422
24423 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24424 predicate(n->as_ShiftV()->is_var_shift());
24425 match(Set dst (URShiftVS (Binary dst src2) mask));
24426 match(Set dst (URShiftVI (Binary dst src2) mask));
24427 match(Set dst (URShiftVL (Binary dst src2) mask));
24428 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24429 ins_encode %{
24430 int vlen_enc = vector_length_encoding(this);
24431 BasicType bt = Matcher::vector_element_basic_type(this);
24432 int opc = this->ideal_Opcode();
24433 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24434 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24435 %}
24436 ins_pipe( pipe_slow );
24437 %}
24438
24439 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24440 match(Set dst (MaxV (Binary dst src2) mask));
24441 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24442 ins_encode %{
24443 int vlen_enc = vector_length_encoding(this);
24444 BasicType bt = Matcher::vector_element_basic_type(this);
24445 int opc = this->ideal_Opcode();
24446 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24447 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24448 %}
24449 ins_pipe( pipe_slow );
24450 %}
24451
24452 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24453 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24454 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24455 ins_encode %{
24456 int vlen_enc = vector_length_encoding(this);
24457 BasicType bt = Matcher::vector_element_basic_type(this);
24458 int opc = this->ideal_Opcode();
24459 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24460 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24461 %}
24462 ins_pipe( pipe_slow );
24463 %}
24464
24465 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24466 match(Set dst (MinV (Binary dst src2) mask));
24467 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24468 ins_encode %{
24469 int vlen_enc = vector_length_encoding(this);
24470 BasicType bt = Matcher::vector_element_basic_type(this);
24471 int opc = this->ideal_Opcode();
24472 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24473 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24474 %}
24475 ins_pipe( pipe_slow );
24476 %}
24477
24478 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24479 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24480 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24481 ins_encode %{
24482 int vlen_enc = vector_length_encoding(this);
24483 BasicType bt = Matcher::vector_element_basic_type(this);
24484 int opc = this->ideal_Opcode();
24485 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24486 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24487 %}
24488 ins_pipe( pipe_slow );
24489 %}
24490
24491 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24492 match(Set dst (VectorRearrange (Binary dst src2) mask));
24493 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24494 ins_encode %{
24495 int vlen_enc = vector_length_encoding(this);
24496 BasicType bt = Matcher::vector_element_basic_type(this);
24497 int opc = this->ideal_Opcode();
24498 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24499 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24500 %}
24501 ins_pipe( pipe_slow );
24502 %}
24503
24504 instruct vabs_masked(vec dst, kReg mask) %{
24505 match(Set dst (AbsVB dst mask));
24506 match(Set dst (AbsVS dst mask));
24507 match(Set dst (AbsVI dst mask));
24508 match(Set dst (AbsVL dst mask));
24509 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24510 ins_encode %{
24511 int vlen_enc = vector_length_encoding(this);
24512 BasicType bt = Matcher::vector_element_basic_type(this);
24513 int opc = this->ideal_Opcode();
24514 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24515 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24516 %}
24517 ins_pipe( pipe_slow );
24518 %}
24519
24520 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24521 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24522 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24523 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24524 ins_encode %{
24525 assert(UseFMA, "Needs FMA instructions support.");
24526 int vlen_enc = vector_length_encoding(this);
24527 BasicType bt = Matcher::vector_element_basic_type(this);
24528 int opc = this->ideal_Opcode();
24529 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24530 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24531 %}
24532 ins_pipe( pipe_slow );
24533 %}
24534
24535 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24536 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24537 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24538 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24539 ins_encode %{
24540 assert(UseFMA, "Needs FMA instructions support.");
24541 int vlen_enc = vector_length_encoding(this);
24542 BasicType bt = Matcher::vector_element_basic_type(this);
24543 int opc = this->ideal_Opcode();
24544 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24545 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24546 %}
24547 ins_pipe( pipe_slow );
24548 %}
24549
24550 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24551 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24552 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24553 ins_encode %{
24554 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24555 int vlen_enc = vector_length_encoding(this, $src1);
24556 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24557
24558 // Comparison i
24559 switch (src1_elem_bt) {
24560 case T_BYTE: {
24561 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24562 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24563 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24564 break;
24565 }
24566 case T_SHORT: {
24567 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24568 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24569 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24570 break;
24571 }
24572 case T_INT: {
24573 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24574 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24575 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24576 break;
24577 }
24578 case T_LONG: {
24579 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24580 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24581 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24582 break;
24583 }
24584 case T_FLOAT: {
24585 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24586 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24587 break;
24588 }
24589 case T_DOUBLE: {
24590 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24591 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24592 break;
24593 }
24594 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24595 }
24596 %}
24597 ins_pipe( pipe_slow );
24598 %}
24599
24600 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24601 predicate(Matcher::vector_length(n) <= 32);
24602 match(Set dst (MaskAll src));
24603 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24604 ins_encode %{
24605 int mask_len = Matcher::vector_length(this);
24606 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24607 %}
24608 ins_pipe( pipe_slow );
24609 %}
24610
24611 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24612 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24613 match(Set dst (XorVMask src (MaskAll cnt)));
24614 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24615 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24616 ins_encode %{
24617 uint masklen = Matcher::vector_length(this);
24618 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24619 %}
24620 ins_pipe( pipe_slow );
24621 %}
24622
24623 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24624 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24625 (Matcher::vector_length(n) == 16) ||
24626 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24627 match(Set dst (XorVMask src (MaskAll cnt)));
24628 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24629 ins_encode %{
24630 uint masklen = Matcher::vector_length(this);
24631 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24632 %}
24633 ins_pipe( pipe_slow );
24634 %}
24635
24636 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24637 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24638 match(Set dst (VectorLongToMask src));
24639 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24640 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24641 ins_encode %{
24642 int mask_len = Matcher::vector_length(this);
24643 int vec_enc = vector_length_encoding(mask_len);
24644 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24645 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24646 %}
24647 ins_pipe( pipe_slow );
24648 %}
24649
24650
24651 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24652 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24653 match(Set dst (VectorLongToMask src));
24654 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24655 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24656 ins_encode %{
24657 int mask_len = Matcher::vector_length(this);
24658 assert(mask_len <= 32, "invalid mask length");
24659 int vec_enc = vector_length_encoding(mask_len);
24660 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24661 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24662 %}
24663 ins_pipe( pipe_slow );
24664 %}
24665
24666 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24667 predicate(n->bottom_type()->isa_vectmask());
24668 match(Set dst (VectorLongToMask src));
24669 format %{ "long_to_mask_evex $dst, $src\t!" %}
24670 ins_encode %{
24671 __ kmov($dst$$KRegister, $src$$Register);
24672 %}
24673 ins_pipe( pipe_slow );
24674 %}
24675
24676 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24677 match(Set dst (AndVMask src1 src2));
24678 match(Set dst (OrVMask src1 src2));
24679 match(Set dst (XorVMask src1 src2));
24680 effect(TEMP kscratch);
24681 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24682 ins_encode %{
24683 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24684 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24685 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24686 uint masklen = Matcher::vector_length(this);
24687 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24688 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24689 %}
24690 ins_pipe( pipe_slow );
24691 %}
24692
24693 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24694 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24695 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24696 ins_encode %{
24697 int vlen_enc = vector_length_encoding(this);
24698 BasicType bt = Matcher::vector_element_basic_type(this);
24699 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24700 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24701 %}
24702 ins_pipe( pipe_slow );
24703 %}
24704
24705 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24706 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24707 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24708 ins_encode %{
24709 int vlen_enc = vector_length_encoding(this);
24710 BasicType bt = Matcher::vector_element_basic_type(this);
24711 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24712 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24713 %}
24714 ins_pipe( pipe_slow );
24715 %}
24716
24717 instruct castMM(kReg dst)
24718 %{
24719 match(Set dst (CastVV dst));
24720
24721 size(0);
24722 format %{ "# castVV of $dst" %}
24723 ins_encode(/* empty encoding */);
24724 ins_cost(0);
24725 ins_pipe(empty);
24726 %}
24727
24728 instruct castVV(vec dst)
24729 %{
24730 match(Set dst (CastVV dst));
24731
24732 size(0);
24733 format %{ "# castVV of $dst" %}
24734 ins_encode(/* empty encoding */);
24735 ins_cost(0);
24736 ins_pipe(empty);
24737 %}
24738
24739 instruct castVVLeg(legVec dst)
24740 %{
24741 match(Set dst (CastVV dst));
24742
24743 size(0);
24744 format %{ "# castVV of $dst" %}
24745 ins_encode(/* empty encoding */);
24746 ins_cost(0);
24747 ins_pipe(empty);
24748 %}
24749
24750 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24751 %{
24752 match(Set dst (IsInfiniteF src));
24753 effect(TEMP ktmp, KILL cr);
24754 format %{ "float_class_check $dst, $src" %}
24755 ins_encode %{
24756 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24757 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24758 %}
24759 ins_pipe(pipe_slow);
24760 %}
24761
24762 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24763 %{
24764 match(Set dst (IsInfiniteD src));
24765 effect(TEMP ktmp, KILL cr);
24766 format %{ "double_class_check $dst, $src" %}
24767 ins_encode %{
24768 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24769 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24770 %}
24771 ins_pipe(pipe_slow);
24772 %}
24773
24774 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24775 %{
24776 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24777 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24778 match(Set dst (SaturatingAddV src1 src2));
24779 match(Set dst (SaturatingSubV src1 src2));
24780 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24781 ins_encode %{
24782 int vlen_enc = vector_length_encoding(this);
24783 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24784 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24785 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24786 %}
24787 ins_pipe(pipe_slow);
24788 %}
24789
24790 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24791 %{
24792 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24793 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24794 match(Set dst (SaturatingAddV src1 src2));
24795 match(Set dst (SaturatingSubV src1 src2));
24796 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24797 ins_encode %{
24798 int vlen_enc = vector_length_encoding(this);
24799 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24800 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24801 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24802 %}
24803 ins_pipe(pipe_slow);
24804 %}
24805
24806 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24807 %{
24808 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24809 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24810 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24811 match(Set dst (SaturatingAddV src1 src2));
24812 match(Set dst (SaturatingSubV src1 src2));
24813 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24814 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24815 ins_encode %{
24816 int vlen_enc = vector_length_encoding(this);
24817 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24818 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24819 $src1$$XMMRegister, $src2$$XMMRegister,
24820 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24821 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24822 %}
24823 ins_pipe(pipe_slow);
24824 %}
24825
24826 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24827 %{
24828 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24829 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24830 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24831 match(Set dst (SaturatingAddV src1 src2));
24832 match(Set dst (SaturatingSubV src1 src2));
24833 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24834 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24835 ins_encode %{
24836 int vlen_enc = vector_length_encoding(this);
24837 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24838 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24839 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24840 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24841 %}
24842 ins_pipe(pipe_slow);
24843 %}
24844
24845 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24846 %{
24847 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24848 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24849 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24850 match(Set dst (SaturatingAddV src1 src2));
24851 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24852 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24853 ins_encode %{
24854 int vlen_enc = vector_length_encoding(this);
24855 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24856 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24857 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24858 %}
24859 ins_pipe(pipe_slow);
24860 %}
24861
24862 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24863 %{
24864 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24865 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24866 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24867 match(Set dst (SaturatingAddV src1 src2));
24868 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24869 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24870 ins_encode %{
24871 int vlen_enc = vector_length_encoding(this);
24872 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24873 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24874 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24875 %}
24876 ins_pipe(pipe_slow);
24877 %}
24878
24879 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24880 %{
24881 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24882 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24883 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24884 match(Set dst (SaturatingSubV src1 src2));
24885 effect(TEMP ktmp);
24886 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24887 ins_encode %{
24888 int vlen_enc = vector_length_encoding(this);
24889 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24890 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24891 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24892 %}
24893 ins_pipe(pipe_slow);
24894 %}
24895
24896 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24897 %{
24898 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24899 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24900 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24901 match(Set dst (SaturatingSubV src1 src2));
24902 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24903 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24904 ins_encode %{
24905 int vlen_enc = vector_length_encoding(this);
24906 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24907 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24908 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24909 %}
24910 ins_pipe(pipe_slow);
24911 %}
24912
24913 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24914 %{
24915 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24916 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24917 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24918 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24919 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24920 ins_encode %{
24921 int vlen_enc = vector_length_encoding(this);
24922 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24923 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24924 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24925 %}
24926 ins_pipe(pipe_slow);
24927 %}
24928
24929 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24930 %{
24931 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24932 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24933 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24934 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24935 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24936 ins_encode %{
24937 int vlen_enc = vector_length_encoding(this);
24938 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24939 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24940 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24941 %}
24942 ins_pipe(pipe_slow);
24943 %}
24944
24945 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24946 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24947 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24948 match(Set dst (SaturatingAddV (Binary dst src) mask));
24949 match(Set dst (SaturatingSubV (Binary dst src) mask));
24950 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24951 ins_encode %{
24952 int vlen_enc = vector_length_encoding(this);
24953 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24954 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24955 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24956 %}
24957 ins_pipe( pipe_slow );
24958 %}
24959
24960 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24961 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24962 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24963 match(Set dst (SaturatingAddV (Binary dst src) mask));
24964 match(Set dst (SaturatingSubV (Binary dst src) mask));
24965 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24966 ins_encode %{
24967 int vlen_enc = vector_length_encoding(this);
24968 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24969 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24970 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24971 %}
24972 ins_pipe( pipe_slow );
24973 %}
24974
24975 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24976 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24977 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24978 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24979 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24980 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24981 ins_encode %{
24982 int vlen_enc = vector_length_encoding(this);
24983 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24984 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24985 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24986 %}
24987 ins_pipe( pipe_slow );
24988 %}
24989
24990 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
24991 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24992 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24993 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24994 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24995 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24996 ins_encode %{
24997 int vlen_enc = vector_length_encoding(this);
24998 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24999 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25000 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25001 %}
25002 ins_pipe( pipe_slow );
25003 %}
25004
25005 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25006 %{
25007 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25008 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25009 ins_encode %{
25010 int vlen_enc = vector_length_encoding(this);
25011 BasicType bt = Matcher::vector_element_basic_type(this);
25012 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25013 %}
25014 ins_pipe(pipe_slow);
25015 %}
25016
25017 instruct reinterpretS2HF(regF dst, rRegI src)
25018 %{
25019 match(Set dst (ReinterpretS2HF src));
25020 format %{ "vmovw $dst, $src" %}
25021 ins_encode %{
25022 __ vmovw($dst$$XMMRegister, $src$$Register);
25023 %}
25024 ins_pipe(pipe_slow);
25025 %}
25026
25027 instruct reinterpretHF2S(rRegI dst, regF src)
25028 %{
25029 match(Set dst (ReinterpretHF2S src));
25030 format %{ "vmovw $dst, $src" %}
25031 ins_encode %{
25032 __ vmovw($dst$$Register, $src$$XMMRegister);
25033 %}
25034 ins_pipe(pipe_slow);
25035 %}
25036
25037 instruct convF2HFAndS2HF(regF dst, regF src)
25038 %{
25039 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25040 format %{ "convF2HFAndS2HF $dst, $src" %}
25041 ins_encode %{
25042 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25043 %}
25044 ins_pipe(pipe_slow);
25045 %}
25046
25047 instruct convHF2SAndHF2F(regF dst, regF src)
25048 %{
25049 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25050 format %{ "convHF2SAndHF2F $dst, $src" %}
25051 ins_encode %{
25052 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25053 %}
25054 ins_pipe(pipe_slow);
25055 %}
25056
25057 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25058 %{
25059 match(Set dst (SqrtHF src));
25060 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25061 ins_encode %{
25062 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25063 %}
25064 ins_pipe(pipe_slow);
25065 %}
25066
25067 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25068 %{
25069 match(Set dst (AddHF src1 src2));
25070 match(Set dst (DivHF src1 src2));
25071 match(Set dst (MulHF src1 src2));
25072 match(Set dst (SubHF src1 src2));
25073 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25074 ins_encode %{
25075 int opcode = this->ideal_Opcode();
25076 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25077 %}
25078 ins_pipe(pipe_slow);
25079 %}
25080
25081 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25082 %{
25083 predicate(VM_Version::supports_avx10_2());
25084 match(Set dst (MaxHF src1 src2));
25085 match(Set dst (MinHF src1 src2));
25086 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25087 ins_encode %{
25088 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25089 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25090 %}
25091 ins_pipe( pipe_slow );
25092 %}
25093
25094 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25095 %{
25096 predicate(!VM_Version::supports_avx10_2());
25097 match(Set dst (MaxHF src1 src2));
25098 match(Set dst (MinHF src1 src2));
25099 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25100 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25101 ins_encode %{
25102 int opcode = this->ideal_Opcode();
25103 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25104 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25105 %}
25106 ins_pipe( pipe_slow );
25107 %}
25108
25109 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25110 %{
25111 match(Set dst (FmaHF src2 (Binary dst src1)));
25112 effect(DEF dst);
25113 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25114 ins_encode %{
25115 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25116 %}
25117 ins_pipe( pipe_slow );
25118 %}
25119
25120
25121 instruct vector_sqrt_HF_reg(vec dst, vec src)
25122 %{
25123 match(Set dst (SqrtVHF src));
25124 format %{ "vector_sqrt_fp16 $dst, $src" %}
25125 ins_encode %{
25126 int vlen_enc = vector_length_encoding(this);
25127 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25128 %}
25129 ins_pipe(pipe_slow);
25130 %}
25131
25132 instruct vector_sqrt_HF_mem(vec dst, memory src)
25133 %{
25134 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25135 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25136 ins_encode %{
25137 int vlen_enc = vector_length_encoding(this);
25138 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25139 %}
25140 ins_pipe(pipe_slow);
25141 %}
25142
25143 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25144 %{
25145 match(Set dst (AddVHF src1 src2));
25146 match(Set dst (DivVHF src1 src2));
25147 match(Set dst (MulVHF src1 src2));
25148 match(Set dst (SubVHF src1 src2));
25149 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25150 ins_encode %{
25151 int vlen_enc = vector_length_encoding(this);
25152 int opcode = this->ideal_Opcode();
25153 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25154 %}
25155 ins_pipe(pipe_slow);
25156 %}
25157
25158
25159 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25160 %{
25161 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25162 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25163 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25164 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25165 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25166 ins_encode %{
25167 int vlen_enc = vector_length_encoding(this);
25168 int opcode = this->ideal_Opcode();
25169 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25170 %}
25171 ins_pipe(pipe_slow);
25172 %}
25173
25174 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25175 %{
25176 match(Set dst (FmaVHF src2 (Binary dst src1)));
25177 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25178 ins_encode %{
25179 int vlen_enc = vector_length_encoding(this);
25180 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25181 %}
25182 ins_pipe( pipe_slow );
25183 %}
25184
25185 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25186 %{
25187 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25188 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25189 ins_encode %{
25190 int vlen_enc = vector_length_encoding(this);
25191 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25192 %}
25193 ins_pipe( pipe_slow );
25194 %}
25195
25196 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25197 %{
25198 predicate(VM_Version::supports_avx10_2());
25199 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25200 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25201 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25202 ins_encode %{
25203 int vlen_enc = vector_length_encoding(this);
25204 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25205 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25206 %}
25207 ins_pipe( pipe_slow );
25208 %}
25209
25210 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25211 %{
25212 predicate(VM_Version::supports_avx10_2());
25213 match(Set dst (MinVHF src1 src2));
25214 match(Set dst (MaxVHF src1 src2));
25215 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25216 ins_encode %{
25217 int vlen_enc = vector_length_encoding(this);
25218 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25219 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25220 %}
25221 ins_pipe( pipe_slow );
25222 %}
25223
25224 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25225 %{
25226 predicate(!VM_Version::supports_avx10_2());
25227 match(Set dst (MinVHF src1 src2));
25228 match(Set dst (MaxVHF src1 src2));
25229 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25230 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25231 ins_encode %{
25232 int vlen_enc = vector_length_encoding(this);
25233 int opcode = this->ideal_Opcode();
25234 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25235 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25236 %}
25237 ins_pipe( pipe_slow );
25238 %}
25239
25240 //----------PEEPHOLE RULES-----------------------------------------------------
25241 // These must follow all instruction definitions as they use the names
25242 // defined in the instructions definitions.
25243 //
25244 // peeppredicate ( rule_predicate );
25245 // // the predicate unless which the peephole rule will be ignored
25246 //
25247 // peepmatch ( root_instr_name [preceding_instruction]* );
25248 //
25249 // peepprocedure ( procedure_name );
25250 // // provide a procedure name to perform the optimization, the procedure should
25251 // // reside in the architecture dependent peephole file, the method has the
25252 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25253 // // with the arguments being the basic block, the current node index inside the
25254 // // block, the register allocator, the functions upon invoked return a new node
25255 // // defined in peepreplace, and the rules of the nodes appearing in the
25256 // // corresponding peepmatch, the function return true if successful, else
25257 // // return false
25258 //
25259 // peepconstraint %{
25260 // (instruction_number.operand_name relational_op instruction_number.operand_name
25261 // [, ...] );
25262 // // instruction numbers are zero-based using left to right order in peepmatch
25263 //
25264 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25265 // // provide an instruction_number.operand_name for each operand that appears
25266 // // in the replacement instruction's match rule
25267 //
25268 // ---------VM FLAGS---------------------------------------------------------
25269 //
25270 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25271 //
25272 // Each peephole rule is given an identifying number starting with zero and
25273 // increasing by one in the order seen by the parser. An individual peephole
25274 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25275 // on the command-line.
25276 //
25277 // ---------CURRENT LIMITATIONS----------------------------------------------
25278 //
25279 // Only transformations inside a basic block (do we need more for peephole)
25280 //
25281 // ---------EXAMPLE----------------------------------------------------------
25282 //
25283 // // pertinent parts of existing instructions in architecture description
25284 // instruct movI(rRegI dst, rRegI src)
25285 // %{
25286 // match(Set dst (CopyI src));
25287 // %}
25288 //
25289 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25290 // %{
25291 // match(Set dst (AddI dst src));
25292 // effect(KILL cr);
25293 // %}
25294 //
25295 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25296 // %{
25297 // match(Set dst (AddI dst src));
25298 // %}
25299 //
25300 // 1. Simple replacement
25301 // - Only match adjacent instructions in same basic block
25302 // - Only equality constraints
25303 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25304 // - Only one replacement instruction
25305 //
25306 // // Change (inc mov) to lea
25307 // peephole %{
25308 // // lea should only be emitted when beneficial
25309 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25310 // // increment preceded by register-register move
25311 // peepmatch ( incI_rReg movI );
25312 // // require that the destination register of the increment
25313 // // match the destination register of the move
25314 // peepconstraint ( 0.dst == 1.dst );
25315 // // construct a replacement instruction that sets
25316 // // the destination to ( move's source register + one )
25317 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25318 // %}
25319 //
25320 // 2. Procedural replacement
25321 // - More flexible finding relevent nodes
25322 // - More flexible constraints
25323 // - More flexible transformations
25324 // - May utilise architecture-dependent API more effectively
25325 // - Currently only one replacement instruction due to adlc parsing capabilities
25326 //
25327 // // Change (inc mov) to lea
25328 // peephole %{
25329 // // lea should only be emitted when beneficial
25330 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25331 // // the rule numbers of these nodes inside are passed into the function below
25332 // peepmatch ( incI_rReg movI );
25333 // // the method that takes the responsibility of transformation
25334 // peepprocedure ( inc_mov_to_lea );
25335 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25336 // // node is passed into the function above
25337 // peepreplace ( leaI_rReg_immI() );
25338 // %}
25339
25340 // These instructions is not matched by the matcher but used by the peephole
25341 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25342 %{
25343 predicate(false);
25344 match(Set dst (AddI src1 src2));
25345 format %{ "leal $dst, [$src1 + $src2]" %}
25346 ins_encode %{
25347 Register dst = $dst$$Register;
25348 Register src1 = $src1$$Register;
25349 Register src2 = $src2$$Register;
25350 if (src1 != rbp && src1 != r13) {
25351 __ leal(dst, Address(src1, src2, Address::times_1));
25352 } else {
25353 assert(src2 != rbp && src2 != r13, "");
25354 __ leal(dst, Address(src2, src1, Address::times_1));
25355 }
25356 %}
25357 ins_pipe(ialu_reg_reg);
25358 %}
25359
25360 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25361 %{
25362 predicate(false);
25363 match(Set dst (AddI src1 src2));
25364 format %{ "leal $dst, [$src1 + $src2]" %}
25365 ins_encode %{
25366 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25367 %}
25368 ins_pipe(ialu_reg_reg);
25369 %}
25370
25371 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25372 %{
25373 predicate(false);
25374 match(Set dst (LShiftI src shift));
25375 format %{ "leal $dst, [$src << $shift]" %}
25376 ins_encode %{
25377 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25378 Register src = $src$$Register;
25379 if (scale == Address::times_2 && src != rbp && src != r13) {
25380 __ leal($dst$$Register, Address(src, src, Address::times_1));
25381 } else {
25382 __ leal($dst$$Register, Address(noreg, src, scale));
25383 }
25384 %}
25385 ins_pipe(ialu_reg_reg);
25386 %}
25387
25388 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25389 %{
25390 predicate(false);
25391 match(Set dst (AddL src1 src2));
25392 format %{ "leaq $dst, [$src1 + $src2]" %}
25393 ins_encode %{
25394 Register dst = $dst$$Register;
25395 Register src1 = $src1$$Register;
25396 Register src2 = $src2$$Register;
25397 if (src1 != rbp && src1 != r13) {
25398 __ leaq(dst, Address(src1, src2, Address::times_1));
25399 } else {
25400 assert(src2 != rbp && src2 != r13, "");
25401 __ leaq(dst, Address(src2, src1, Address::times_1));
25402 }
25403 %}
25404 ins_pipe(ialu_reg_reg);
25405 %}
25406
25407 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25408 %{
25409 predicate(false);
25410 match(Set dst (AddL src1 src2));
25411 format %{ "leaq $dst, [$src1 + $src2]" %}
25412 ins_encode %{
25413 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25414 %}
25415 ins_pipe(ialu_reg_reg);
25416 %}
25417
25418 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25419 %{
25420 predicate(false);
25421 match(Set dst (LShiftL src shift));
25422 format %{ "leaq $dst, [$src << $shift]" %}
25423 ins_encode %{
25424 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25425 Register src = $src$$Register;
25426 if (scale == Address::times_2 && src != rbp && src != r13) {
25427 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25428 } else {
25429 __ leaq($dst$$Register, Address(noreg, src, scale));
25430 }
25431 %}
25432 ins_pipe(ialu_reg_reg);
25433 %}
25434
25435 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25436 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25437 // processors with at least partial ALU support for lea
25438 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25439 // beneficial for processors with full ALU support
25440 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25441
25442 peephole
25443 %{
25444 peeppredicate(VM_Version::supports_fast_2op_lea());
25445 peepmatch (addI_rReg);
25446 peepprocedure (lea_coalesce_reg);
25447 peepreplace (leaI_rReg_rReg_peep());
25448 %}
25449
25450 peephole
25451 %{
25452 peeppredicate(VM_Version::supports_fast_2op_lea());
25453 peepmatch (addI_rReg_imm);
25454 peepprocedure (lea_coalesce_imm);
25455 peepreplace (leaI_rReg_immI_peep());
25456 %}
25457
25458 peephole
25459 %{
25460 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25461 VM_Version::is_intel_cascade_lake());
25462 peepmatch (incI_rReg);
25463 peepprocedure (lea_coalesce_imm);
25464 peepreplace (leaI_rReg_immI_peep());
25465 %}
25466
25467 peephole
25468 %{
25469 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25470 VM_Version::is_intel_cascade_lake());
25471 peepmatch (decI_rReg);
25472 peepprocedure (lea_coalesce_imm);
25473 peepreplace (leaI_rReg_immI_peep());
25474 %}
25475
25476 peephole
25477 %{
25478 peeppredicate(VM_Version::supports_fast_2op_lea());
25479 peepmatch (salI_rReg_immI2);
25480 peepprocedure (lea_coalesce_imm);
25481 peepreplace (leaI_rReg_immI2_peep());
25482 %}
25483
25484 peephole
25485 %{
25486 peeppredicate(VM_Version::supports_fast_2op_lea());
25487 peepmatch (addL_rReg);
25488 peepprocedure (lea_coalesce_reg);
25489 peepreplace (leaL_rReg_rReg_peep());
25490 %}
25491
25492 peephole
25493 %{
25494 peeppredicate(VM_Version::supports_fast_2op_lea());
25495 peepmatch (addL_rReg_imm);
25496 peepprocedure (lea_coalesce_imm);
25497 peepreplace (leaL_rReg_immL32_peep());
25498 %}
25499
25500 peephole
25501 %{
25502 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25503 VM_Version::is_intel_cascade_lake());
25504 peepmatch (incL_rReg);
25505 peepprocedure (lea_coalesce_imm);
25506 peepreplace (leaL_rReg_immL32_peep());
25507 %}
25508
25509 peephole
25510 %{
25511 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25512 VM_Version::is_intel_cascade_lake());
25513 peepmatch (decL_rReg);
25514 peepprocedure (lea_coalesce_imm);
25515 peepreplace (leaL_rReg_immL32_peep());
25516 %}
25517
25518 peephole
25519 %{
25520 peeppredicate(VM_Version::supports_fast_2op_lea());
25521 peepmatch (salL_rReg_immI2);
25522 peepprocedure (lea_coalesce_imm);
25523 peepreplace (leaL_rReg_immI2_peep());
25524 %}
25525
25526 peephole
25527 %{
25528 peepmatch (leaPCompressedOopOffset);
25529 peepprocedure (lea_remove_redundant);
25530 %}
25531
25532 peephole
25533 %{
25534 peepmatch (leaP8Narrow);
25535 peepprocedure (lea_remove_redundant);
25536 %}
25537
25538 peephole
25539 %{
25540 peepmatch (leaP32Narrow);
25541 peepprocedure (lea_remove_redundant);
25542 %}
25543
25544 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25545 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25546
25547 //int variant
25548 peephole
25549 %{
25550 peepmatch (testI_reg);
25551 peepprocedure (test_may_remove);
25552 %}
25553
25554 //long variant
25555 peephole
25556 %{
25557 peepmatch (testL_reg);
25558 peepprocedure (test_may_remove);
25559 %}
25560
25561
25562 //----------SMARTSPILL RULES---------------------------------------------------
25563 // These must follow all instruction definitions as they use the names
25564 // defined in the instructions definitions.