1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2637 assert(EnableVectorSupport, "sanity");
2638 int lo = XMM0_num;
2639 int hi = XMM0b_num;
2640 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2641 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2642 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2643 return OptoRegPair(hi, lo);
2644 }
2645
2646 // Is this branch offset short enough that a short branch can be used?
2647 //
2648 // NOTE: If the platform does not provide any short branch variants, then
2649 // this method should return false for offset 0.
2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2651 // The passed offset is relative to address of the branch.
2652 // On 86 a branch displacement is calculated relative to address
2653 // of a next instruction.
2654 offset -= br_size;
2655
2656 // the short version of jmpConUCF2 contains multiple branches,
2657 // making the reach slightly less
2658 if (rule == jmpConUCF2_rule)
2659 return (-126 <= offset && offset <= 125);
2660 return (-128 <= offset && offset <= 127);
2661 }
2662
2663 // Return whether or not this register is ever used as an argument.
2664 // This function is used on startup to build the trampoline stubs in
2665 // generateOptoStub. Registers not mentioned will be killed by the VM
2666 // call in the trampoline, and arguments in those registers not be
2667 // available to the callee.
2668 bool Matcher::can_be_java_arg(int reg)
2669 {
2670 return
2671 reg == RDI_num || reg == RDI_H_num ||
2672 reg == RSI_num || reg == RSI_H_num ||
2673 reg == RDX_num || reg == RDX_H_num ||
2674 reg == RCX_num || reg == RCX_H_num ||
2675 reg == R8_num || reg == R8_H_num ||
2676 reg == R9_num || reg == R9_H_num ||
2677 reg == R12_num || reg == R12_H_num ||
2678 reg == XMM0_num || reg == XMM0b_num ||
2679 reg == XMM1_num || reg == XMM1b_num ||
2680 reg == XMM2_num || reg == XMM2b_num ||
2681 reg == XMM3_num || reg == XMM3b_num ||
2682 reg == XMM4_num || reg == XMM4b_num ||
2683 reg == XMM5_num || reg == XMM5b_num ||
2684 reg == XMM6_num || reg == XMM6b_num ||
2685 reg == XMM7_num || reg == XMM7b_num;
2686 }
2687
2688 bool Matcher::is_spillable_arg(int reg)
2689 {
2690 return can_be_java_arg(reg);
2691 }
2692
2693 uint Matcher::int_pressure_limit()
2694 {
2695 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2696 }
2697
2698 uint Matcher::float_pressure_limit()
2699 {
2700 // After experiment around with different values, the following default threshold
2701 // works best for LCM's register pressure scheduling on x64.
2702 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2703 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2704 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2705 }
2706
2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2708 // In 64 bit mode a code which use multiply when
2709 // devisor is constant is faster than hardware
2710 // DIV instruction (it uses MulHiL).
2711 return false;
2712 }
2713
2714 // Register for DIVI projection of divmodI
2715 const RegMask& Matcher::divI_proj_mask() {
2716 return INT_RAX_REG_mask();
2717 }
2718
2719 // Register for MODI projection of divmodI
2720 const RegMask& Matcher::modI_proj_mask() {
2721 return INT_RDX_REG_mask();
2722 }
2723
2724 // Register for DIVL projection of divmodL
2725 const RegMask& Matcher::divL_proj_mask() {
2726 return LONG_RAX_REG_mask();
2727 }
2728
2729 // Register for MODL projection of divmodL
2730 const RegMask& Matcher::modL_proj_mask() {
2731 return LONG_RDX_REG_mask();
2732 }
2733
2734 %}
2735
2736 source_hpp %{
2737 // Header information of the source block.
2738 // Method declarations/definitions which are used outside
2739 // the ad-scope can conveniently be defined here.
2740 //
2741 // To keep related declarations/definitions/uses close together,
2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2743
2744 #include "runtime/vm_version.hpp"
2745
2746 class NativeJump;
2747
2748 class CallStubImpl {
2749
2750 //--------------------------------------------------------------
2751 //---< Used for optimization in Compile::shorten_branches >---
2752 //--------------------------------------------------------------
2753
2754 public:
2755 // Size of call trampoline stub.
2756 static uint size_call_trampoline() {
2757 return 0; // no call trampolines on this platform
2758 }
2759
2760 // number of relocations needed by a call trampoline stub
2761 static uint reloc_call_trampoline() {
2762 return 0; // no call trampolines on this platform
2763 }
2764 };
2765
2766 class HandlerImpl {
2767
2768 public:
2769
2770 static int emit_deopt_handler(C2_MacroAssembler* masm);
2771
2772 static uint size_deopt_handler() {
2773 // one call and one jmp.
2774 return 7;
2775 }
2776 };
2777
2778 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2779 switch(bytes) {
2780 case 4: // fall-through
2781 case 8: // fall-through
2782 case 16: return Assembler::AVX_128bit;
2783 case 32: return Assembler::AVX_256bit;
2784 case 64: return Assembler::AVX_512bit;
2785
2786 default: {
2787 ShouldNotReachHere();
2788 return Assembler::AVX_NoVec;
2789 }
2790 }
2791 }
2792
2793 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2794 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2795 }
2796
2797 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2798 uint def_idx = use->operand_index(opnd);
2799 Node* def = use->in(def_idx);
2800 return vector_length_encoding(def);
2801 }
2802
2803 static inline bool is_vector_popcount_predicate(BasicType bt) {
2804 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2805 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2806 }
2807
2808 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2809 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2810 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2811 }
2812
2813 class Node::PD {
2814 public:
2815 enum NodeFlags {
2816 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2817 Flag_sets_carry_flag = Node::_last_flag << 2,
2818 Flag_sets_parity_flag = Node::_last_flag << 3,
2819 Flag_sets_zero_flag = Node::_last_flag << 4,
2820 Flag_sets_overflow_flag = Node::_last_flag << 5,
2821 Flag_sets_sign_flag = Node::_last_flag << 6,
2822 Flag_clears_carry_flag = Node::_last_flag << 7,
2823 Flag_clears_parity_flag = Node::_last_flag << 8,
2824 Flag_clears_zero_flag = Node::_last_flag << 9,
2825 Flag_clears_overflow_flag = Node::_last_flag << 10,
2826 Flag_clears_sign_flag = Node::_last_flag << 11,
2827 _last_flag = Flag_clears_sign_flag
2828 };
2829 };
2830
2831 %} // end source_hpp
2832
2833 source %{
2834
2835 #include "opto/addnode.hpp"
2836 #include "c2_intelJccErratum_x86.hpp"
2837
2838 void PhaseOutput::pd_perform_mach_node_analysis() {
2839 if (VM_Version::has_intel_jcc_erratum()) {
2840 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2841 _buf_sizes._code += extra_padding;
2842 }
2843 }
2844
2845 int MachNode::pd_alignment_required() const {
2846 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2847 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2848 return IntelJccErratum::largest_jcc_size() + 1;
2849 } else {
2850 return 1;
2851 }
2852 }
2853
2854 int MachNode::compute_padding(int current_offset) const {
2855 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2856 Compile* C = Compile::current();
2857 PhaseOutput* output = C->output();
2858 Block* block = output->block();
2859 int index = output->index();
2860 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2861 } else {
2862 return 0;
2863 }
2864 }
2865
2866 // Emit deopt handler code.
2867 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2868
2869 // Note that the code buffer's insts_mark is always relative to insts.
2870 // That's why we must use the macroassembler to generate a handler.
2871 address base = __ start_a_stub(size_deopt_handler());
2872 if (base == nullptr) {
2873 ciEnv::current()->record_failure("CodeCache is full");
2874 return 0; // CodeBuffer::expand failed
2875 }
2876 int offset = __ offset();
2877
2878 Label start;
2879 __ bind(start);
2880
2881 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2882
2883 int entry_offset = __ offset();
2884
2885 __ jmp(start);
2886
2887 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2888 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2889 "out of bounds read in post-call NOP check");
2890 __ end_a_stub();
2891 return entry_offset;
2892 }
2893
2894 static Assembler::Width widthForType(BasicType bt) {
2895 if (bt == T_BYTE) {
2896 return Assembler::B;
2897 } else if (bt == T_SHORT) {
2898 return Assembler::W;
2899 } else if (bt == T_INT) {
2900 return Assembler::D;
2901 } else {
2902 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2903 return Assembler::Q;
2904 }
2905 }
2906
2907 //=============================================================================
2908
2909 // Float masks come from different places depending on platform.
2910 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2911 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2912 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2913 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2914 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2915 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2916 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2917 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2918 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2919 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2920 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2921 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2922 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2923 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2924 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2925 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2926 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2927 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2928 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2929
2930 //=============================================================================
2931 bool Matcher::match_rule_supported(int opcode) {
2932 if (!has_match_rule(opcode)) {
2933 return false; // no match rule present
2934 }
2935 switch (opcode) {
2936 case Op_AbsVL:
2937 case Op_StoreVectorScatter:
2938 if (UseAVX < 3) {
2939 return false;
2940 }
2941 break;
2942 case Op_PopCountI:
2943 case Op_PopCountL:
2944 if (!UsePopCountInstruction) {
2945 return false;
2946 }
2947 break;
2948 case Op_PopCountVI:
2949 if (UseAVX < 2) {
2950 return false;
2951 }
2952 break;
2953 case Op_CompressV:
2954 case Op_ExpandV:
2955 case Op_PopCountVL:
2956 if (UseAVX < 2) {
2957 return false;
2958 }
2959 break;
2960 case Op_MulVI:
2961 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
2962 return false;
2963 }
2964 break;
2965 case Op_MulVL:
2966 if (UseSSE < 4) { // only with SSE4_1 or AVX
2967 return false;
2968 }
2969 break;
2970 case Op_MulReductionVL:
2971 if (VM_Version::supports_avx512dq() == false) {
2972 return false;
2973 }
2974 break;
2975 case Op_AbsVB:
2976 case Op_AbsVS:
2977 case Op_AbsVI:
2978 case Op_AddReductionVI:
2979 case Op_AndReductionV:
2980 case Op_OrReductionV:
2981 case Op_XorReductionV:
2982 if (UseSSE < 3) { // requires at least SSSE3
2983 return false;
2984 }
2985 break;
2986 case Op_MaxHF:
2987 case Op_MinHF:
2988 if (!VM_Version::supports_avx512vlbw()) {
2989 return false;
2990 } // fallthrough
2991 case Op_AddHF:
2992 case Op_DivHF:
2993 case Op_FmaHF:
2994 case Op_MulHF:
2995 case Op_ReinterpretS2HF:
2996 case Op_ReinterpretHF2S:
2997 case Op_SubHF:
2998 case Op_SqrtHF:
2999 if (!VM_Version::supports_avx512_fp16()) {
3000 return false;
3001 }
3002 break;
3003 case Op_VectorLoadShuffle:
3004 case Op_VectorRearrange:
3005 case Op_MulReductionVI:
3006 if (UseSSE < 4) { // requires at least SSE4
3007 return false;
3008 }
3009 break;
3010 case Op_IsInfiniteF:
3011 case Op_IsInfiniteD:
3012 if (!VM_Version::supports_avx512dq()) {
3013 return false;
3014 }
3015 break;
3016 case Op_SqrtVD:
3017 case Op_SqrtVF:
3018 case Op_VectorMaskCmp:
3019 case Op_VectorCastB2X:
3020 case Op_VectorCastS2X:
3021 case Op_VectorCastI2X:
3022 case Op_VectorCastL2X:
3023 case Op_VectorCastF2X:
3024 case Op_VectorCastD2X:
3025 case Op_VectorUCastB2X:
3026 case Op_VectorUCastS2X:
3027 case Op_VectorUCastI2X:
3028 case Op_VectorMaskCast:
3029 if (UseAVX < 1) { // enabled for AVX only
3030 return false;
3031 }
3032 break;
3033 case Op_PopulateIndex:
3034 if (UseAVX < 2) {
3035 return false;
3036 }
3037 break;
3038 case Op_RoundVF:
3039 if (UseAVX < 2) { // enabled for AVX2 only
3040 return false;
3041 }
3042 break;
3043 case Op_RoundVD:
3044 if (UseAVX < 3) {
3045 return false; // enabled for AVX3 only
3046 }
3047 break;
3048 case Op_CompareAndSwapL:
3049 case Op_CompareAndSwapP:
3050 break;
3051 case Op_StrIndexOf:
3052 if (!UseSSE42Intrinsics) {
3053 return false;
3054 }
3055 break;
3056 case Op_StrIndexOfChar:
3057 if (!UseSSE42Intrinsics) {
3058 return false;
3059 }
3060 break;
3061 case Op_OnSpinWait:
3062 if (VM_Version::supports_on_spin_wait() == false) {
3063 return false;
3064 }
3065 break;
3066 case Op_MulVB:
3067 case Op_LShiftVB:
3068 case Op_RShiftVB:
3069 case Op_URShiftVB:
3070 case Op_VectorInsert:
3071 case Op_VectorLoadMask:
3072 case Op_VectorStoreMask:
3073 case Op_VectorBlend:
3074 if (UseSSE < 4) {
3075 return false;
3076 }
3077 break;
3078 case Op_MaxD:
3079 case Op_MaxF:
3080 case Op_MinD:
3081 case Op_MinF:
3082 if (UseAVX < 1) { // enabled for AVX only
3083 return false;
3084 }
3085 break;
3086 case Op_CacheWB:
3087 case Op_CacheWBPreSync:
3088 case Op_CacheWBPostSync:
3089 if (!VM_Version::supports_data_cache_line_flush()) {
3090 return false;
3091 }
3092 break;
3093 case Op_ExtractB:
3094 case Op_ExtractL:
3095 case Op_ExtractI:
3096 case Op_RoundDoubleMode:
3097 if (UseSSE < 4) {
3098 return false;
3099 }
3100 break;
3101 case Op_RoundDoubleModeV:
3102 if (VM_Version::supports_avx() == false) {
3103 return false; // 128bit vroundpd is not available
3104 }
3105 break;
3106 case Op_LoadVectorGather:
3107 case Op_LoadVectorGatherMasked:
3108 if (UseAVX < 2) {
3109 return false;
3110 }
3111 break;
3112 case Op_FmaF:
3113 case Op_FmaD:
3114 case Op_FmaVD:
3115 case Op_FmaVF:
3116 if (!UseFMA) {
3117 return false;
3118 }
3119 break;
3120 case Op_MacroLogicV:
3121 if (UseAVX < 3 || !UseVectorMacroLogic) {
3122 return false;
3123 }
3124 break;
3125
3126 case Op_VectorCmpMasked:
3127 case Op_VectorMaskGen:
3128 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3129 return false;
3130 }
3131 break;
3132 case Op_VectorMaskFirstTrue:
3133 case Op_VectorMaskLastTrue:
3134 case Op_VectorMaskTrueCount:
3135 case Op_VectorMaskToLong:
3136 if (UseAVX < 1) {
3137 return false;
3138 }
3139 break;
3140 case Op_RoundF:
3141 case Op_RoundD:
3142 break;
3143 case Op_CopySignD:
3144 case Op_CopySignF:
3145 if (UseAVX < 3) {
3146 return false;
3147 }
3148 if (!VM_Version::supports_avx512vl()) {
3149 return false;
3150 }
3151 break;
3152 case Op_CompressBits:
3153 case Op_ExpandBits:
3154 if (!VM_Version::supports_bmi2()) {
3155 return false;
3156 }
3157 break;
3158 case Op_CompressM:
3159 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3160 return false;
3161 }
3162 break;
3163 case Op_ConvF2HF:
3164 case Op_ConvHF2F:
3165 if (!VM_Version::supports_float16()) {
3166 return false;
3167 }
3168 break;
3169 case Op_VectorCastF2HF:
3170 case Op_VectorCastHF2F:
3171 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3172 return false;
3173 }
3174 break;
3175 }
3176 return true; // Match rules are supported by default.
3177 }
3178
3179 //------------------------------------------------------------------------
3180
3181 static inline bool is_pop_count_instr_target(BasicType bt) {
3182 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3183 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3184 }
3185
3186 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3187 return match_rule_supported_vector(opcode, vlen, bt);
3188 }
3189
3190 // Identify extra cases that we might want to provide match rules for vector nodes and
3191 // other intrinsics guarded with vector length (vlen) and element type (bt).
3192 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3193 if (!match_rule_supported(opcode)) {
3194 return false;
3195 }
3196 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3197 // * SSE2 supports 128bit vectors for all types;
3198 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3199 // * AVX2 supports 256bit vectors for all types;
3200 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3201 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3202 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3203 // And MaxVectorSize is taken into account as well.
3204 if (!vector_size_supported(bt, vlen)) {
3205 return false;
3206 }
3207 // Special cases which require vector length follow:
3208 // * implementation limitations
3209 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3210 // * 128bit vroundpd instruction is present only in AVX1
3211 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3212 switch (opcode) {
3213 case Op_MaxVHF:
3214 case Op_MinVHF:
3215 if (!VM_Version::supports_avx512bw()) {
3216 return false;
3217 }
3218 case Op_AddVHF:
3219 case Op_DivVHF:
3220 case Op_FmaVHF:
3221 case Op_MulVHF:
3222 case Op_SubVHF:
3223 case Op_SqrtVHF:
3224 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3225 return false;
3226 }
3227 if (!VM_Version::supports_avx512_fp16()) {
3228 return false;
3229 }
3230 break;
3231 case Op_AbsVF:
3232 case Op_NegVF:
3233 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3234 return false; // 512bit vandps and vxorps are not available
3235 }
3236 break;
3237 case Op_AbsVD:
3238 case Op_NegVD:
3239 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3240 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3241 }
3242 break;
3243 case Op_RotateRightV:
3244 case Op_RotateLeftV:
3245 if (bt != T_INT && bt != T_LONG) {
3246 return false;
3247 } // fallthrough
3248 case Op_MacroLogicV:
3249 if (!VM_Version::supports_evex() ||
3250 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3251 return false;
3252 }
3253 break;
3254 case Op_ClearArray:
3255 case Op_VectorMaskGen:
3256 case Op_VectorCmpMasked:
3257 if (!VM_Version::supports_avx512bw()) {
3258 return false;
3259 }
3260 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3261 return false;
3262 }
3263 break;
3264 case Op_LoadVectorMasked:
3265 case Op_StoreVectorMasked:
3266 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3267 return false;
3268 }
3269 break;
3270 case Op_UMinV:
3271 case Op_UMaxV:
3272 if (UseAVX == 0) {
3273 return false;
3274 }
3275 break;
3276 case Op_MaxV:
3277 case Op_MinV:
3278 if (UseSSE < 4 && is_integral_type(bt)) {
3279 return false;
3280 }
3281 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3282 // Float/Double intrinsics are enabled for AVX family currently.
3283 if (UseAVX == 0) {
3284 return false;
3285 }
3286 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3287 return false;
3288 }
3289 }
3290 break;
3291 case Op_CallLeafVector:
3292 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3293 return false;
3294 }
3295 break;
3296 case Op_AddReductionVI:
3297 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3298 return false;
3299 }
3300 // fallthrough
3301 case Op_AndReductionV:
3302 case Op_OrReductionV:
3303 case Op_XorReductionV:
3304 if (is_subword_type(bt) && (UseSSE < 4)) {
3305 return false;
3306 }
3307 break;
3308 case Op_MinReductionV:
3309 case Op_MaxReductionV:
3310 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3311 return false;
3312 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3313 return false;
3314 }
3315 // Float/Double intrinsics enabled for AVX family.
3316 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3317 return false;
3318 }
3319 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3320 return false;
3321 }
3322 break;
3323 case Op_VectorTest:
3324 if (UseSSE < 4) {
3325 return false; // Implementation limitation
3326 } else if (size_in_bits < 32) {
3327 return false; // Implementation limitation
3328 }
3329 break;
3330 case Op_VectorLoadShuffle:
3331 case Op_VectorRearrange:
3332 if(vlen == 2) {
3333 return false; // Implementation limitation due to how shuffle is loaded
3334 } else if (size_in_bits == 256 && UseAVX < 2) {
3335 return false; // Implementation limitation
3336 }
3337 break;
3338 case Op_VectorLoadMask:
3339 case Op_VectorMaskCast:
3340 if (size_in_bits == 256 && UseAVX < 2) {
3341 return false; // Implementation limitation
3342 }
3343 // fallthrough
3344 case Op_VectorStoreMask:
3345 if (vlen == 2) {
3346 return false; // Implementation limitation
3347 }
3348 break;
3349 case Op_PopulateIndex:
3350 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3351 return false;
3352 }
3353 break;
3354 case Op_VectorCastB2X:
3355 case Op_VectorCastS2X:
3356 case Op_VectorCastI2X:
3357 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3358 return false;
3359 }
3360 break;
3361 case Op_VectorCastL2X:
3362 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3363 return false;
3364 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3365 return false;
3366 }
3367 break;
3368 case Op_VectorCastF2X: {
3369 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3370 // happen after intermediate conversion to integer and special handling
3371 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3372 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3373 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3374 return false;
3375 }
3376 }
3377 // fallthrough
3378 case Op_VectorCastD2X:
3379 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3380 return false;
3381 }
3382 break;
3383 case Op_VectorCastF2HF:
3384 case Op_VectorCastHF2F:
3385 if (!VM_Version::supports_f16c() &&
3386 ((!VM_Version::supports_evex() ||
3387 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3388 return false;
3389 }
3390 break;
3391 case Op_RoundVD:
3392 if (!VM_Version::supports_avx512dq()) {
3393 return false;
3394 }
3395 break;
3396 case Op_MulReductionVI:
3397 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3398 return false;
3399 }
3400 break;
3401 case Op_LoadVectorGatherMasked:
3402 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3403 return false;
3404 }
3405 if (is_subword_type(bt) &&
3406 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3407 (size_in_bits < 64) ||
3408 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3409 return false;
3410 }
3411 break;
3412 case Op_StoreVectorScatterMasked:
3413 case Op_StoreVectorScatter:
3414 if (is_subword_type(bt)) {
3415 return false;
3416 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3417 return false;
3418 }
3419 // fallthrough
3420 case Op_LoadVectorGather:
3421 if (!is_subword_type(bt) && size_in_bits == 64) {
3422 return false;
3423 }
3424 if (is_subword_type(bt) && size_in_bits < 64) {
3425 return false;
3426 }
3427 break;
3428 case Op_SaturatingAddV:
3429 case Op_SaturatingSubV:
3430 if (UseAVX < 1) {
3431 return false; // Implementation limitation
3432 }
3433 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3434 return false;
3435 }
3436 break;
3437 case Op_SelectFromTwoVector:
3438 if (size_in_bits < 128) {
3439 return false;
3440 }
3441 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3442 return false;
3443 }
3444 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3445 return false;
3446 }
3447 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3448 return false;
3449 }
3450 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3451 return false;
3452 }
3453 break;
3454 case Op_MaskAll:
3455 if (!VM_Version::supports_evex()) {
3456 return false;
3457 }
3458 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3459 return false;
3460 }
3461 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3462 return false;
3463 }
3464 break;
3465 case Op_VectorMaskCmp:
3466 if (vlen < 2 || size_in_bits < 32) {
3467 return false;
3468 }
3469 break;
3470 case Op_CompressM:
3471 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3472 return false;
3473 }
3474 break;
3475 case Op_CompressV:
3476 case Op_ExpandV:
3477 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3478 return false;
3479 }
3480 if (size_in_bits < 128 ) {
3481 return false;
3482 }
3483 case Op_VectorLongToMask:
3484 if (UseAVX < 1) {
3485 return false;
3486 }
3487 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3488 return false;
3489 }
3490 break;
3491 case Op_SignumVD:
3492 case Op_SignumVF:
3493 if (UseAVX < 1) {
3494 return false;
3495 }
3496 break;
3497 case Op_PopCountVI:
3498 case Op_PopCountVL: {
3499 if (!is_pop_count_instr_target(bt) &&
3500 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3501 return false;
3502 }
3503 }
3504 break;
3505 case Op_ReverseV:
3506 case Op_ReverseBytesV:
3507 if (UseAVX < 2) {
3508 return false;
3509 }
3510 break;
3511 case Op_CountTrailingZerosV:
3512 case Op_CountLeadingZerosV:
3513 if (UseAVX < 2) {
3514 return false;
3515 }
3516 break;
3517 }
3518 return true; // Per default match rules are supported.
3519 }
3520
3521 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3522 // ADLC based match_rule_supported routine checks for the existence of pattern based
3523 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3524 // of their non-masked counterpart with mask edge being the differentiator.
3525 // This routine does a strict check on the existence of masked operation patterns
3526 // by returning a default false value for all the other opcodes apart from the
3527 // ones whose masked instruction patterns are defined in this file.
3528 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3529 return false;
3530 }
3531
3532 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3533 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 switch(opcode) {
3537 // Unary masked operations
3538 case Op_AbsVB:
3539 case Op_AbsVS:
3540 if(!VM_Version::supports_avx512bw()) {
3541 return false; // Implementation limitation
3542 }
3543 case Op_AbsVI:
3544 case Op_AbsVL:
3545 return true;
3546
3547 // Ternary masked operations
3548 case Op_FmaVF:
3549 case Op_FmaVD:
3550 return true;
3551
3552 case Op_MacroLogicV:
3553 if(bt != T_INT && bt != T_LONG) {
3554 return false;
3555 }
3556 return true;
3557
3558 // Binary masked operations
3559 case Op_AddVB:
3560 case Op_AddVS:
3561 case Op_SubVB:
3562 case Op_SubVS:
3563 case Op_MulVS:
3564 case Op_LShiftVS:
3565 case Op_RShiftVS:
3566 case Op_URShiftVS:
3567 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3568 if (!VM_Version::supports_avx512bw()) {
3569 return false; // Implementation limitation
3570 }
3571 return true;
3572
3573 case Op_MulVL:
3574 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3575 if (!VM_Version::supports_avx512dq()) {
3576 return false; // Implementation limitation
3577 }
3578 return true;
3579
3580 case Op_AndV:
3581 case Op_OrV:
3582 case Op_XorV:
3583 case Op_RotateRightV:
3584 case Op_RotateLeftV:
3585 if (bt != T_INT && bt != T_LONG) {
3586 return false; // Implementation limitation
3587 }
3588 return true;
3589
3590 case Op_VectorLoadMask:
3591 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3592 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3593 return false;
3594 }
3595 return true;
3596
3597 case Op_AddVI:
3598 case Op_AddVL:
3599 case Op_AddVF:
3600 case Op_AddVD:
3601 case Op_SubVI:
3602 case Op_SubVL:
3603 case Op_SubVF:
3604 case Op_SubVD:
3605 case Op_MulVI:
3606 case Op_MulVF:
3607 case Op_MulVD:
3608 case Op_DivVF:
3609 case Op_DivVD:
3610 case Op_SqrtVF:
3611 case Op_SqrtVD:
3612 case Op_LShiftVI:
3613 case Op_LShiftVL:
3614 case Op_RShiftVI:
3615 case Op_RShiftVL:
3616 case Op_URShiftVI:
3617 case Op_URShiftVL:
3618 case Op_LoadVectorMasked:
3619 case Op_StoreVectorMasked:
3620 case Op_LoadVectorGatherMasked:
3621 case Op_StoreVectorScatterMasked:
3622 return true;
3623
3624 case Op_UMinV:
3625 case Op_UMaxV:
3626 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3627 return false;
3628 } // fallthrough
3629 case Op_MaxV:
3630 case Op_MinV:
3631 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3632 return false; // Implementation limitation
3633 }
3634 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3635 return false; // Implementation limitation
3636 }
3637 return true;
3638 case Op_SaturatingAddV:
3639 case Op_SaturatingSubV:
3640 if (!is_subword_type(bt)) {
3641 return false;
3642 }
3643 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3644 return false; // Implementation limitation
3645 }
3646 return true;
3647
3648 case Op_VectorMaskCmp:
3649 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3650 return false; // Implementation limitation
3651 }
3652 return true;
3653
3654 case Op_VectorRearrange:
3655 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3656 return false; // Implementation limitation
3657 }
3658 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3659 return false; // Implementation limitation
3660 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3661 return false; // Implementation limitation
3662 }
3663 return true;
3664
3665 // Binary Logical operations
3666 case Op_AndVMask:
3667 case Op_OrVMask:
3668 case Op_XorVMask:
3669 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3670 return false; // Implementation limitation
3671 }
3672 return true;
3673
3674 case Op_PopCountVI:
3675 case Op_PopCountVL:
3676 if (!is_pop_count_instr_target(bt)) {
3677 return false;
3678 }
3679 return true;
3680
3681 case Op_MaskAll:
3682 return true;
3683
3684 case Op_CountLeadingZerosV:
3685 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3686 return true;
3687 }
3688 default:
3689 return false;
3690 }
3691 }
3692
3693 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3694 return false;
3695 }
3696
3697 // Return true if Vector::rearrange needs preparation of the shuffle argument
3698 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3699 switch (elem_bt) {
3700 case T_BYTE: return false;
3701 case T_SHORT: return !VM_Version::supports_avx512bw();
3702 case T_INT: return !VM_Version::supports_avx();
3703 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3704 default:
3705 ShouldNotReachHere();
3706 return false;
3707 }
3708 }
3709
3710 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3711 // Prefer predicate if the mask type is "TypeVectMask".
3712 return vt->isa_vectmask() != nullptr;
3713 }
3714
3715 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3716 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3717 bool legacy = (generic_opnd->opcode() == LEGVEC);
3718 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3719 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3720 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3721 return new legVecZOper();
3722 }
3723 if (legacy) {
3724 switch (ideal_reg) {
3725 case Op_VecS: return new legVecSOper();
3726 case Op_VecD: return new legVecDOper();
3727 case Op_VecX: return new legVecXOper();
3728 case Op_VecY: return new legVecYOper();
3729 case Op_VecZ: return new legVecZOper();
3730 }
3731 } else {
3732 switch (ideal_reg) {
3733 case Op_VecS: return new vecSOper();
3734 case Op_VecD: return new vecDOper();
3735 case Op_VecX: return new vecXOper();
3736 case Op_VecY: return new vecYOper();
3737 case Op_VecZ: return new vecZOper();
3738 }
3739 }
3740 ShouldNotReachHere();
3741 return nullptr;
3742 }
3743
3744 bool Matcher::is_reg2reg_move(MachNode* m) {
3745 switch (m->rule()) {
3746 case MoveVec2Leg_rule:
3747 case MoveLeg2Vec_rule:
3748 case MoveF2VL_rule:
3749 case MoveF2LEG_rule:
3750 case MoveVL2F_rule:
3751 case MoveLEG2F_rule:
3752 case MoveD2VL_rule:
3753 case MoveD2LEG_rule:
3754 case MoveVL2D_rule:
3755 case MoveLEG2D_rule:
3756 return true;
3757 default:
3758 return false;
3759 }
3760 }
3761
3762 bool Matcher::is_generic_vector(MachOper* opnd) {
3763 switch (opnd->opcode()) {
3764 case VEC:
3765 case LEGVEC:
3766 return true;
3767 default:
3768 return false;
3769 }
3770 }
3771
3772 //------------------------------------------------------------------------
3773
3774 const RegMask* Matcher::predicate_reg_mask(void) {
3775 return &_VECTMASK_REG_mask;
3776 }
3777
3778 // Max vector size in bytes. 0 if not supported.
3779 int Matcher::vector_width_in_bytes(BasicType bt) {
3780 assert(is_java_primitive(bt), "only primitive type vectors");
3781 // SSE2 supports 128bit vectors for all types.
3782 // AVX2 supports 256bit vectors for all types.
3783 // AVX2/EVEX supports 512bit vectors for all types.
3784 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3785 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3786 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3787 size = (UseAVX > 2) ? 64 : 32;
3788 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3789 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3790 // Use flag to limit vector size.
3791 size = MIN2(size,(int)MaxVectorSize);
3792 // Minimum 2 values in vector (or 4 for bytes).
3793 switch (bt) {
3794 case T_DOUBLE:
3795 case T_LONG:
3796 if (size < 16) return 0;
3797 break;
3798 case T_FLOAT:
3799 case T_INT:
3800 if (size < 8) return 0;
3801 break;
3802 case T_BOOLEAN:
3803 if (size < 4) return 0;
3804 break;
3805 case T_CHAR:
3806 if (size < 4) return 0;
3807 break;
3808 case T_BYTE:
3809 if (size < 4) return 0;
3810 break;
3811 case T_SHORT:
3812 if (size < 4) return 0;
3813 break;
3814 default:
3815 ShouldNotReachHere();
3816 }
3817 return size;
3818 }
3819
3820 // Limits on vector size (number of elements) loaded into vector.
3821 int Matcher::max_vector_size(const BasicType bt) {
3822 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3823 }
3824 int Matcher::min_vector_size(const BasicType bt) {
3825 int max_size = max_vector_size(bt);
3826 // Min size which can be loaded into vector is 4 bytes.
3827 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3828 // Support for calling svml double64 vectors
3829 if (bt == T_DOUBLE) {
3830 size = 1;
3831 }
3832 return MIN2(size,max_size);
3833 }
3834
3835 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3836 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3837 // by default on Cascade Lake
3838 if (VM_Version::is_default_intel_cascade_lake()) {
3839 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3840 }
3841 return Matcher::max_vector_size(bt);
3842 }
3843
3844 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3845 return -1;
3846 }
3847
3848 // Vector ideal reg corresponding to specified size in bytes
3849 uint Matcher::vector_ideal_reg(int size) {
3850 assert(MaxVectorSize >= size, "");
3851 switch(size) {
3852 case 4: return Op_VecS;
3853 case 8: return Op_VecD;
3854 case 16: return Op_VecX;
3855 case 32: return Op_VecY;
3856 case 64: return Op_VecZ;
3857 }
3858 ShouldNotReachHere();
3859 return 0;
3860 }
3861
3862 // Check for shift by small constant as well
3863 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3864 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3865 shift->in(2)->get_int() <= 3 &&
3866 // Are there other uses besides address expressions?
3867 !matcher->is_visited(shift)) {
3868 address_visited.set(shift->_idx); // Flag as address_visited
3869 mstack.push(shift->in(2), Matcher::Visit);
3870 Node *conv = shift->in(1);
3871 // Allow Matcher to match the rule which bypass
3872 // ConvI2L operation for an array index on LP64
3873 // if the index value is positive.
3874 if (conv->Opcode() == Op_ConvI2L &&
3875 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3876 // Are there other uses besides address expressions?
3877 !matcher->is_visited(conv)) {
3878 address_visited.set(conv->_idx); // Flag as address_visited
3879 mstack.push(conv->in(1), Matcher::Pre_Visit);
3880 } else {
3881 mstack.push(conv, Matcher::Pre_Visit);
3882 }
3883 return true;
3884 }
3885 return false;
3886 }
3887
3888 // This function identifies sub-graphs in which a 'load' node is
3889 // input to two different nodes, and such that it can be matched
3890 // with BMI instructions like blsi, blsr, etc.
3891 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3892 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3893 // refers to the same node.
3894 //
3895 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3896 // This is a temporary solution until we make DAGs expressible in ADL.
3897 template<typename ConType>
3898 class FusedPatternMatcher {
3899 Node* _op1_node;
3900 Node* _mop_node;
3901 int _con_op;
3902
3903 static int match_next(Node* n, int next_op, int next_op_idx) {
3904 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3905 return -1;
3906 }
3907
3908 if (next_op_idx == -1) { // n is commutative, try rotations
3909 if (n->in(1)->Opcode() == next_op) {
3910 return 1;
3911 } else if (n->in(2)->Opcode() == next_op) {
3912 return 2;
3913 }
3914 } else {
3915 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3916 if (n->in(next_op_idx)->Opcode() == next_op) {
3917 return next_op_idx;
3918 }
3919 }
3920 return -1;
3921 }
3922
3923 public:
3924 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3925 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3926
3927 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3928 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
3929 typename ConType::NativeType con_value) {
3930 if (_op1_node->Opcode() != op1) {
3931 return false;
3932 }
3933 if (_mop_node->outcnt() > 2) {
3934 return false;
3935 }
3936 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
3937 if (op1_op2_idx == -1) {
3938 return false;
3939 }
3940 // Memory operation must be the other edge
3941 int op1_mop_idx = (op1_op2_idx & 1) + 1;
3942
3943 // Check that the mop node is really what we want
3944 if (_op1_node->in(op1_mop_idx) == _mop_node) {
3945 Node* op2_node = _op1_node->in(op1_op2_idx);
3946 if (op2_node->outcnt() > 1) {
3947 return false;
3948 }
3949 assert(op2_node->Opcode() == op2, "Should be");
3950 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
3951 if (op2_con_idx == -1) {
3952 return false;
3953 }
3954 // Memory operation must be the other edge
3955 int op2_mop_idx = (op2_con_idx & 1) + 1;
3956 // Check that the memory operation is the same node
3957 if (op2_node->in(op2_mop_idx) == _mop_node) {
3958 // Now check the constant
3959 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
3960 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
3961 return true;
3962 }
3963 }
3964 }
3965 return false;
3966 }
3967 };
3968
3969 static bool is_bmi_pattern(Node* n, Node* m) {
3970 assert(UseBMI1Instructions, "sanity");
3971 if (n != nullptr && m != nullptr) {
3972 if (m->Opcode() == Op_LoadI) {
3973 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
3974 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
3975 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
3976 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
3977 } else if (m->Opcode() == Op_LoadL) {
3978 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
3979 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
3980 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
3981 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
3982 }
3983 }
3984 return false;
3985 }
3986
3987 // Should the matcher clone input 'm' of node 'n'?
3988 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
3989 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
3990 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
3991 mstack.push(m, Visit);
3992 return true;
3993 }
3994 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
3995 mstack.push(m, Visit); // m = ShiftCntV
3996 return true;
3997 }
3998 if (is_encode_and_store_pattern(n, m)) {
3999 mstack.push(m, Visit);
4000 return true;
4001 }
4002 return false;
4003 }
4004
4005 // Should the Matcher clone shifts on addressing modes, expecting them
4006 // to be subsumed into complex addressing expressions or compute them
4007 // into registers?
4008 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4009 Node *off = m->in(AddPNode::Offset);
4010 if (off->is_Con()) {
4011 address_visited.test_set(m->_idx); // Flag as address_visited
4012 Node *adr = m->in(AddPNode::Address);
4013
4014 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4015 // AtomicAdd is not an addressing expression.
4016 // Cheap to find it by looking for screwy base.
4017 if (adr->is_AddP() &&
4018 !adr->in(AddPNode::Base)->is_top() &&
4019 !adr->in(AddPNode::Offset)->is_Con() &&
4020 off->get_long() == (int) (off->get_long()) && // immL32
4021 // Are there other uses besides address expressions?
4022 !is_visited(adr)) {
4023 address_visited.set(adr->_idx); // Flag as address_visited
4024 Node *shift = adr->in(AddPNode::Offset);
4025 if (!clone_shift(shift, this, mstack, address_visited)) {
4026 mstack.push(shift, Pre_Visit);
4027 }
4028 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4029 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4030 } else {
4031 mstack.push(adr, Pre_Visit);
4032 }
4033
4034 // Clone X+offset as it also folds into most addressing expressions
4035 mstack.push(off, Visit);
4036 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4037 return true;
4038 } else if (clone_shift(off, this, mstack, address_visited)) {
4039 address_visited.test_set(m->_idx); // Flag as address_visited
4040 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4041 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4042 return true;
4043 }
4044 return false;
4045 }
4046
4047 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4048 switch (bt) {
4049 case BoolTest::eq:
4050 return Assembler::eq;
4051 case BoolTest::ne:
4052 return Assembler::neq;
4053 case BoolTest::le:
4054 case BoolTest::ule:
4055 return Assembler::le;
4056 case BoolTest::ge:
4057 case BoolTest::uge:
4058 return Assembler::nlt;
4059 case BoolTest::lt:
4060 case BoolTest::ult:
4061 return Assembler::lt;
4062 case BoolTest::gt:
4063 case BoolTest::ugt:
4064 return Assembler::nle;
4065 default : ShouldNotReachHere(); return Assembler::_false;
4066 }
4067 }
4068
4069 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4070 switch (bt) {
4071 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4072 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4073 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4074 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4075 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4076 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4077 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4078 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4079 }
4080 }
4081
4082 // Helper methods for MachSpillCopyNode::implementation().
4083 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4084 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4085 assert(ireg == Op_VecS || // 32bit vector
4086 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4087 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4088 "no non-adjacent vector moves" );
4089 if (masm) {
4090 switch (ireg) {
4091 case Op_VecS: // copy whole register
4092 case Op_VecD:
4093 case Op_VecX:
4094 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4095 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4096 } else {
4097 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4098 }
4099 break;
4100 case Op_VecY:
4101 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4102 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4103 } else {
4104 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4105 }
4106 break;
4107 case Op_VecZ:
4108 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4109 break;
4110 default:
4111 ShouldNotReachHere();
4112 }
4113 #ifndef PRODUCT
4114 } else {
4115 switch (ireg) {
4116 case Op_VecS:
4117 case Op_VecD:
4118 case Op_VecX:
4119 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4120 break;
4121 case Op_VecY:
4122 case Op_VecZ:
4123 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4124 break;
4125 default:
4126 ShouldNotReachHere();
4127 }
4128 #endif
4129 }
4130 }
4131
4132 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4133 int stack_offset, int reg, uint ireg, outputStream* st) {
4134 if (masm) {
4135 if (is_load) {
4136 switch (ireg) {
4137 case Op_VecS:
4138 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4139 break;
4140 case Op_VecD:
4141 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4142 break;
4143 case Op_VecX:
4144 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4145 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4146 } else {
4147 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4148 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4149 }
4150 break;
4151 case Op_VecY:
4152 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4153 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4154 } else {
4155 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4156 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4157 }
4158 break;
4159 case Op_VecZ:
4160 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4161 break;
4162 default:
4163 ShouldNotReachHere();
4164 }
4165 } else { // store
4166 switch (ireg) {
4167 case Op_VecS:
4168 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4169 break;
4170 case Op_VecD:
4171 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4172 break;
4173 case Op_VecX:
4174 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4175 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4176 }
4177 else {
4178 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4179 }
4180 break;
4181 case Op_VecY:
4182 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4183 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4184 }
4185 else {
4186 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4187 }
4188 break;
4189 case Op_VecZ:
4190 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4191 break;
4192 default:
4193 ShouldNotReachHere();
4194 }
4195 }
4196 #ifndef PRODUCT
4197 } else {
4198 if (is_load) {
4199 switch (ireg) {
4200 case Op_VecS:
4201 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4202 break;
4203 case Op_VecD:
4204 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4205 break;
4206 case Op_VecX:
4207 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4208 break;
4209 case Op_VecY:
4210 case Op_VecZ:
4211 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4212 break;
4213 default:
4214 ShouldNotReachHere();
4215 }
4216 } else { // store
4217 switch (ireg) {
4218 case Op_VecS:
4219 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4220 break;
4221 case Op_VecD:
4222 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4223 break;
4224 case Op_VecX:
4225 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4226 break;
4227 case Op_VecY:
4228 case Op_VecZ:
4229 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4230 break;
4231 default:
4232 ShouldNotReachHere();
4233 }
4234 }
4235 #endif
4236 }
4237 }
4238
4239 template <class T>
4240 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4241 int size = type2aelembytes(bt) * len;
4242 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4243 for (int i = 0; i < len; i++) {
4244 int offset = i * type2aelembytes(bt);
4245 switch (bt) {
4246 case T_BYTE: val->at(i) = con; break;
4247 case T_SHORT: {
4248 jshort c = con;
4249 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4250 break;
4251 }
4252 case T_INT: {
4253 jint c = con;
4254 memcpy(val->adr_at(offset), &c, sizeof(jint));
4255 break;
4256 }
4257 case T_LONG: {
4258 jlong c = con;
4259 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4260 break;
4261 }
4262 case T_FLOAT: {
4263 jfloat c = con;
4264 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4265 break;
4266 }
4267 case T_DOUBLE: {
4268 jdouble c = con;
4269 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4270 break;
4271 }
4272 default: assert(false, "%s", type2name(bt));
4273 }
4274 }
4275 return val;
4276 }
4277
4278 static inline jlong high_bit_set(BasicType bt) {
4279 switch (bt) {
4280 case T_BYTE: return 0x8080808080808080;
4281 case T_SHORT: return 0x8000800080008000;
4282 case T_INT: return 0x8000000080000000;
4283 case T_LONG: return 0x8000000000000000;
4284 default:
4285 ShouldNotReachHere();
4286 return 0;
4287 }
4288 }
4289
4290 #ifndef PRODUCT
4291 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4292 st->print("nop \t# %d bytes pad for loops and calls", _count);
4293 }
4294 #endif
4295
4296 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4297 __ nop(_count);
4298 }
4299
4300 uint MachNopNode::size(PhaseRegAlloc*) const {
4301 return _count;
4302 }
4303
4304 #ifndef PRODUCT
4305 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4306 st->print("# breakpoint");
4307 }
4308 #endif
4309
4310 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4311 __ int3();
4312 }
4313
4314 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4315 return MachNode::size(ra_);
4316 }
4317
4318 %}
4319
4320 //----------ENCODING BLOCK-----------------------------------------------------
4321 // This block specifies the encoding classes used by the compiler to
4322 // output byte streams. Encoding classes are parameterized macros
4323 // used by Machine Instruction Nodes in order to generate the bit
4324 // encoding of the instruction. Operands specify their base encoding
4325 // interface with the interface keyword. There are currently
4326 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4327 // COND_INTER. REG_INTER causes an operand to generate a function
4328 // which returns its register number when queried. CONST_INTER causes
4329 // an operand to generate a function which returns the value of the
4330 // constant when queried. MEMORY_INTER causes an operand to generate
4331 // four functions which return the Base Register, the Index Register,
4332 // the Scale Value, and the Offset Value of the operand when queried.
4333 // COND_INTER causes an operand to generate six functions which return
4334 // the encoding code (ie - encoding bits for the instruction)
4335 // associated with each basic boolean condition for a conditional
4336 // instruction.
4337 //
4338 // Instructions specify two basic values for encoding. Again, a
4339 // function is available to check if the constant displacement is an
4340 // oop. They use the ins_encode keyword to specify their encoding
4341 // classes (which must be a sequence of enc_class names, and their
4342 // parameters, specified in the encoding block), and they use the
4343 // opcode keyword to specify, in order, their primary, secondary, and
4344 // tertiary opcode. Only the opcode sections which a particular
4345 // instruction needs for encoding need to be specified.
4346 encode %{
4347 enc_class cdql_enc(no_rax_rdx_RegI div)
4348 %{
4349 // Full implementation of Java idiv and irem; checks for
4350 // special case as described in JVM spec., p.243 & p.271.
4351 //
4352 // normal case special case
4353 //
4354 // input : rax: dividend min_int
4355 // reg: divisor -1
4356 //
4357 // output: rax: quotient (= rax idiv reg) min_int
4358 // rdx: remainder (= rax irem reg) 0
4359 //
4360 // Code sequnce:
4361 //
4362 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4363 // 5: 75 07/08 jne e <normal>
4364 // 7: 33 d2 xor %edx,%edx
4365 // [div >= 8 -> offset + 1]
4366 // [REX_B]
4367 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4368 // c: 74 03/04 je 11 <done>
4369 // 000000000000000e <normal>:
4370 // e: 99 cltd
4371 // [div >= 8 -> offset + 1]
4372 // [REX_B]
4373 // f: f7 f9 idiv $div
4374 // 0000000000000011 <done>:
4375 Label normal;
4376 Label done;
4377
4378 // cmp $0x80000000,%eax
4379 __ cmpl(as_Register(RAX_enc), 0x80000000);
4380
4381 // jne e <normal>
4382 __ jccb(Assembler::notEqual, normal);
4383
4384 // xor %edx,%edx
4385 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4386
4387 // cmp $0xffffffffffffffff,%ecx
4388 __ cmpl($div$$Register, -1);
4389
4390 // je 11 <done>
4391 __ jccb(Assembler::equal, done);
4392
4393 // <normal>
4394 // cltd
4395 __ bind(normal);
4396 __ cdql();
4397
4398 // idivl
4399 // <done>
4400 __ idivl($div$$Register);
4401 __ bind(done);
4402 %}
4403
4404 enc_class cdqq_enc(no_rax_rdx_RegL div)
4405 %{
4406 // Full implementation of Java ldiv and lrem; checks for
4407 // special case as described in JVM spec., p.243 & p.271.
4408 //
4409 // normal case special case
4410 //
4411 // input : rax: dividend min_long
4412 // reg: divisor -1
4413 //
4414 // output: rax: quotient (= rax idiv reg) min_long
4415 // rdx: remainder (= rax irem reg) 0
4416 //
4417 // Code sequnce:
4418 //
4419 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4420 // 7: 00 00 80
4421 // a: 48 39 d0 cmp %rdx,%rax
4422 // d: 75 08 jne 17 <normal>
4423 // f: 33 d2 xor %edx,%edx
4424 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4425 // 15: 74 05 je 1c <done>
4426 // 0000000000000017 <normal>:
4427 // 17: 48 99 cqto
4428 // 19: 48 f7 f9 idiv $div
4429 // 000000000000001c <done>:
4430 Label normal;
4431 Label done;
4432
4433 // mov $0x8000000000000000,%rdx
4434 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4435
4436 // cmp %rdx,%rax
4437 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4438
4439 // jne 17 <normal>
4440 __ jccb(Assembler::notEqual, normal);
4441
4442 // xor %edx,%edx
4443 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4444
4445 // cmp $0xffffffffffffffff,$div
4446 __ cmpq($div$$Register, -1);
4447
4448 // je 1e <done>
4449 __ jccb(Assembler::equal, done);
4450
4451 // <normal>
4452 // cqto
4453 __ bind(normal);
4454 __ cdqq();
4455
4456 // idivq (note: must be emitted by the user of this rule)
4457 // <done>
4458 __ idivq($div$$Register);
4459 __ bind(done);
4460 %}
4461
4462 enc_class clear_avx %{
4463 DEBUG_ONLY(int off0 = __ offset());
4464 if (generate_vzeroupper(Compile::current())) {
4465 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4466 // Clear upper bits of YMM registers when current compiled code uses
4467 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4468 __ vzeroupper();
4469 }
4470 DEBUG_ONLY(int off1 = __ offset());
4471 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4472 %}
4473
4474 enc_class Java_To_Runtime(method meth) %{
4475 __ lea(r10, RuntimeAddress((address)$meth$$method));
4476 __ call(r10);
4477 __ post_call_nop();
4478 %}
4479
4480 enc_class Java_Static_Call(method meth)
4481 %{
4482 // JAVA STATIC CALL
4483 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4484 // determine who we intended to call.
4485 if (!_method) {
4486 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4487 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4488 // The NOP here is purely to ensure that eliding a call to
4489 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4490 __ addr_nop_5();
4491 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4492 } else {
4493 int method_index = resolved_method_index(masm);
4494 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4495 : static_call_Relocation::spec(method_index);
4496 address mark = __ pc();
4497 int call_offset = __ offset();
4498 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4499 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4500 // Calls of the same statically bound method can share
4501 // a stub to the interpreter.
4502 __ code()->shared_stub_to_interp_for(_method, call_offset);
4503 } else {
4504 // Emit stubs for static call.
4505 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4506 __ clear_inst_mark();
4507 if (stub == nullptr) {
4508 ciEnv::current()->record_failure("CodeCache is full");
4509 return;
4510 }
4511 }
4512 }
4513 __ post_call_nop();
4514 %}
4515
4516 enc_class Java_Dynamic_Call(method meth) %{
4517 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4518 __ post_call_nop();
4519 %}
4520
4521 enc_class call_epilog %{
4522 if (VerifyStackAtCalls) {
4523 // Check that stack depth is unchanged: find majik cookie on stack
4524 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4525 Label L;
4526 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4527 __ jccb(Assembler::equal, L);
4528 // Die if stack mismatch
4529 __ int3();
4530 __ bind(L);
4531 }
4532 %}
4533
4534 %}
4535
4536 //----------FRAME--------------------------------------------------------------
4537 // Definition of frame structure and management information.
4538 //
4539 // S T A C K L A Y O U T Allocators stack-slot number
4540 // | (to get allocators register number
4541 // G Owned by | | v add OptoReg::stack0())
4542 // r CALLER | |
4543 // o | +--------+ pad to even-align allocators stack-slot
4544 // w V | pad0 | numbers; owned by CALLER
4545 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4546 // h ^ | in | 5
4547 // | | args | 4 Holes in incoming args owned by SELF
4548 // | | | | 3
4549 // | | +--------+
4550 // V | | old out| Empty on Intel, window on Sparc
4551 // | old |preserve| Must be even aligned.
4552 // | SP-+--------+----> Matcher::_old_SP, even aligned
4553 // | | in | 3 area for Intel ret address
4554 // Owned by |preserve| Empty on Sparc.
4555 // SELF +--------+
4556 // | | pad2 | 2 pad to align old SP
4557 // | +--------+ 1
4558 // | | locks | 0
4559 // | +--------+----> OptoReg::stack0(), even aligned
4560 // | | pad1 | 11 pad to align new SP
4561 // | +--------+
4562 // | | | 10
4563 // | | spills | 9 spills
4564 // V | | 8 (pad0 slot for callee)
4565 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4566 // ^ | out | 7
4567 // | | args | 6 Holes in outgoing args owned by CALLEE
4568 // Owned by +--------+
4569 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4570 // | new |preserve| Must be even-aligned.
4571 // | SP-+--------+----> Matcher::_new_SP, even aligned
4572 // | | |
4573 //
4574 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4575 // known from SELF's arguments and the Java calling convention.
4576 // Region 6-7 is determined per call site.
4577 // Note 2: If the calling convention leaves holes in the incoming argument
4578 // area, those holes are owned by SELF. Holes in the outgoing area
4579 // are owned by the CALLEE. Holes should not be necessary in the
4580 // incoming area, as the Java calling convention is completely under
4581 // the control of the AD file. Doubles can be sorted and packed to
4582 // avoid holes. Holes in the outgoing arguments may be necessary for
4583 // varargs C calling conventions.
4584 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4585 // even aligned with pad0 as needed.
4586 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4587 // region 6-11 is even aligned; it may be padded out more so that
4588 // the region from SP to FP meets the minimum stack alignment.
4589 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4590 // alignment. Region 11, pad1, may be dynamically extended so that
4591 // SP meets the minimum alignment.
4592
4593 frame
4594 %{
4595 // These three registers define part of the calling convention
4596 // between compiled code and the interpreter.
4597 inline_cache_reg(RAX); // Inline Cache Register
4598
4599 // Optional: name the operand used by cisc-spilling to access
4600 // [stack_pointer + offset]
4601 cisc_spilling_operand_name(indOffset32);
4602
4603 // Number of stack slots consumed by locking an object
4604 sync_stack_slots(2);
4605
4606 // Compiled code's Frame Pointer
4607 frame_pointer(RSP);
4608
4609 // Interpreter stores its frame pointer in a register which is
4610 // stored to the stack by I2CAdaptors.
4611 // I2CAdaptors convert from interpreted java to compiled java.
4612 interpreter_frame_pointer(RBP);
4613
4614 // Stack alignment requirement
4615 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4616
4617 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4618 // for calls to C. Supports the var-args backing area for register parms.
4619 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4620
4621 // The after-PROLOG location of the return address. Location of
4622 // return address specifies a type (REG or STACK) and a number
4623 // representing the register number (i.e. - use a register name) or
4624 // stack slot.
4625 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4626 // Otherwise, it is above the locks and verification slot and alignment word
4627 return_addr(STACK - 2 +
4628 align_up((Compile::current()->in_preserve_stack_slots() +
4629 Compile::current()->fixed_slots()),
4630 stack_alignment_in_slots()));
4631
4632 // Location of compiled Java return values. Same as C for now.
4633 return_value
4634 %{
4635 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4636 "only return normal values");
4637
4638 static const int lo[Op_RegL + 1] = {
4639 0,
4640 0,
4641 RAX_num, // Op_RegN
4642 RAX_num, // Op_RegI
4643 RAX_num, // Op_RegP
4644 XMM0_num, // Op_RegF
4645 XMM0_num, // Op_RegD
4646 RAX_num // Op_RegL
4647 };
4648 static const int hi[Op_RegL + 1] = {
4649 0,
4650 0,
4651 OptoReg::Bad, // Op_RegN
4652 OptoReg::Bad, // Op_RegI
4653 RAX_H_num, // Op_RegP
4654 OptoReg::Bad, // Op_RegF
4655 XMM0b_num, // Op_RegD
4656 RAX_H_num // Op_RegL
4657 };
4658 // Excluded flags and vector registers.
4659 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4660 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4661 %}
4662 %}
4663
4664 //----------ATTRIBUTES---------------------------------------------------------
4665 //----------Operand Attributes-------------------------------------------------
4666 op_attrib op_cost(0); // Required cost attribute
4667
4668 //----------Instruction Attributes---------------------------------------------
4669 ins_attrib ins_cost(100); // Required cost attribute
4670 ins_attrib ins_size(8); // Required size attribute (in bits)
4671 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4672 // a non-matching short branch variant
4673 // of some long branch?
4674 ins_attrib ins_alignment(1); // Required alignment attribute (must
4675 // be a power of 2) specifies the
4676 // alignment that some part of the
4677 // instruction (not necessarily the
4678 // start) requires. If > 1, a
4679 // compute_padding() function must be
4680 // provided for the instruction
4681
4682 // Whether this node is expanded during code emission into a sequence of
4683 // instructions and the first instruction can perform an implicit null check.
4684 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4685
4686 //----------OPERANDS-----------------------------------------------------------
4687 // Operand definitions must precede instruction definitions for correct parsing
4688 // in the ADLC because operands constitute user defined types which are used in
4689 // instruction definitions.
4690
4691 //----------Simple Operands----------------------------------------------------
4692 // Immediate Operands
4693 // Integer Immediate
4694 operand immI()
4695 %{
4696 match(ConI);
4697
4698 op_cost(10);
4699 format %{ %}
4700 interface(CONST_INTER);
4701 %}
4702
4703 // Constant for test vs zero
4704 operand immI_0()
4705 %{
4706 predicate(n->get_int() == 0);
4707 match(ConI);
4708
4709 op_cost(0);
4710 format %{ %}
4711 interface(CONST_INTER);
4712 %}
4713
4714 // Constant for increment
4715 operand immI_1()
4716 %{
4717 predicate(n->get_int() == 1);
4718 match(ConI);
4719
4720 op_cost(0);
4721 format %{ %}
4722 interface(CONST_INTER);
4723 %}
4724
4725 // Constant for decrement
4726 operand immI_M1()
4727 %{
4728 predicate(n->get_int() == -1);
4729 match(ConI);
4730
4731 op_cost(0);
4732 format %{ %}
4733 interface(CONST_INTER);
4734 %}
4735
4736 operand immI_2()
4737 %{
4738 predicate(n->get_int() == 2);
4739 match(ConI);
4740
4741 op_cost(0);
4742 format %{ %}
4743 interface(CONST_INTER);
4744 %}
4745
4746 operand immI_4()
4747 %{
4748 predicate(n->get_int() == 4);
4749 match(ConI);
4750
4751 op_cost(0);
4752 format %{ %}
4753 interface(CONST_INTER);
4754 %}
4755
4756 operand immI_8()
4757 %{
4758 predicate(n->get_int() == 8);
4759 match(ConI);
4760
4761 op_cost(0);
4762 format %{ %}
4763 interface(CONST_INTER);
4764 %}
4765
4766 // Valid scale values for addressing modes
4767 operand immI2()
4768 %{
4769 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4770 match(ConI);
4771
4772 format %{ %}
4773 interface(CONST_INTER);
4774 %}
4775
4776 operand immU7()
4777 %{
4778 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4779 match(ConI);
4780
4781 op_cost(5);
4782 format %{ %}
4783 interface(CONST_INTER);
4784 %}
4785
4786 operand immI8()
4787 %{
4788 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4789 match(ConI);
4790
4791 op_cost(5);
4792 format %{ %}
4793 interface(CONST_INTER);
4794 %}
4795
4796 operand immU8()
4797 %{
4798 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4799 match(ConI);
4800
4801 op_cost(5);
4802 format %{ %}
4803 interface(CONST_INTER);
4804 %}
4805
4806 operand immI16()
4807 %{
4808 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4809 match(ConI);
4810
4811 op_cost(10);
4812 format %{ %}
4813 interface(CONST_INTER);
4814 %}
4815
4816 // Int Immediate non-negative
4817 operand immU31()
4818 %{
4819 predicate(n->get_int() >= 0);
4820 match(ConI);
4821
4822 op_cost(0);
4823 format %{ %}
4824 interface(CONST_INTER);
4825 %}
4826
4827 // Pointer Immediate
4828 operand immP()
4829 %{
4830 match(ConP);
4831
4832 op_cost(10);
4833 format %{ %}
4834 interface(CONST_INTER);
4835 %}
4836
4837 // Null Pointer Immediate
4838 operand immP0()
4839 %{
4840 predicate(n->get_ptr() == 0);
4841 match(ConP);
4842
4843 op_cost(5);
4844 format %{ %}
4845 interface(CONST_INTER);
4846 %}
4847
4848 // Pointer Immediate
4849 operand immN() %{
4850 match(ConN);
4851
4852 op_cost(10);
4853 format %{ %}
4854 interface(CONST_INTER);
4855 %}
4856
4857 operand immNKlass() %{
4858 match(ConNKlass);
4859
4860 op_cost(10);
4861 format %{ %}
4862 interface(CONST_INTER);
4863 %}
4864
4865 // Null Pointer Immediate
4866 operand immN0() %{
4867 predicate(n->get_narrowcon() == 0);
4868 match(ConN);
4869
4870 op_cost(5);
4871 format %{ %}
4872 interface(CONST_INTER);
4873 %}
4874
4875 operand immP31()
4876 %{
4877 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4878 && (n->get_ptr() >> 31) == 0);
4879 match(ConP);
4880
4881 op_cost(5);
4882 format %{ %}
4883 interface(CONST_INTER);
4884 %}
4885
4886
4887 // Long Immediate
4888 operand immL()
4889 %{
4890 match(ConL);
4891
4892 op_cost(20);
4893 format %{ %}
4894 interface(CONST_INTER);
4895 %}
4896
4897 // Long Immediate 8-bit
4898 operand immL8()
4899 %{
4900 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4901 match(ConL);
4902
4903 op_cost(5);
4904 format %{ %}
4905 interface(CONST_INTER);
4906 %}
4907
4908 // Long Immediate 32-bit unsigned
4909 operand immUL32()
4910 %{
4911 predicate(n->get_long() == (unsigned int) (n->get_long()));
4912 match(ConL);
4913
4914 op_cost(10);
4915 format %{ %}
4916 interface(CONST_INTER);
4917 %}
4918
4919 // Long Immediate 32-bit signed
4920 operand immL32()
4921 %{
4922 predicate(n->get_long() == (int) (n->get_long()));
4923 match(ConL);
4924
4925 op_cost(15);
4926 format %{ %}
4927 interface(CONST_INTER);
4928 %}
4929
4930 operand immL_Pow2()
4931 %{
4932 predicate(is_power_of_2((julong)n->get_long()));
4933 match(ConL);
4934
4935 op_cost(15);
4936 format %{ %}
4937 interface(CONST_INTER);
4938 %}
4939
4940 operand immL_NotPow2()
4941 %{
4942 predicate(is_power_of_2((julong)~n->get_long()));
4943 match(ConL);
4944
4945 op_cost(15);
4946 format %{ %}
4947 interface(CONST_INTER);
4948 %}
4949
4950 // Long Immediate zero
4951 operand immL0()
4952 %{
4953 predicate(n->get_long() == 0L);
4954 match(ConL);
4955
4956 op_cost(10);
4957 format %{ %}
4958 interface(CONST_INTER);
4959 %}
4960
4961 // Constant for increment
4962 operand immL1()
4963 %{
4964 predicate(n->get_long() == 1);
4965 match(ConL);
4966
4967 format %{ %}
4968 interface(CONST_INTER);
4969 %}
4970
4971 // Constant for decrement
4972 operand immL_M1()
4973 %{
4974 predicate(n->get_long() == -1);
4975 match(ConL);
4976
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Long Immediate: low 32-bit mask
4982 operand immL_32bits()
4983 %{
4984 predicate(n->get_long() == 0xFFFFFFFFL);
4985 match(ConL);
4986 op_cost(20);
4987
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 // Int Immediate: 2^n-1, positive
4993 operand immI_Pow2M1()
4994 %{
4995 predicate((n->get_int() > 0)
4996 && is_power_of_2((juint)n->get_int() + 1));
4997 match(ConI);
4998
4999 op_cost(20);
5000 format %{ %}
5001 interface(CONST_INTER);
5002 %}
5003
5004 // Float Immediate zero
5005 operand immF0()
5006 %{
5007 predicate(jint_cast(n->getf()) == 0);
5008 match(ConF);
5009
5010 op_cost(5);
5011 format %{ %}
5012 interface(CONST_INTER);
5013 %}
5014
5015 // Float Immediate
5016 operand immF()
5017 %{
5018 match(ConF);
5019
5020 op_cost(15);
5021 format %{ %}
5022 interface(CONST_INTER);
5023 %}
5024
5025 // Half Float Immediate
5026 operand immH()
5027 %{
5028 match(ConH);
5029
5030 op_cost(15);
5031 format %{ %}
5032 interface(CONST_INTER);
5033 %}
5034
5035 // Double Immediate zero
5036 operand immD0()
5037 %{
5038 predicate(jlong_cast(n->getd()) == 0);
5039 match(ConD);
5040
5041 op_cost(5);
5042 format %{ %}
5043 interface(CONST_INTER);
5044 %}
5045
5046 // Double Immediate
5047 operand immD()
5048 %{
5049 match(ConD);
5050
5051 op_cost(15);
5052 format %{ %}
5053 interface(CONST_INTER);
5054 %}
5055
5056 // Immediates for special shifts (sign extend)
5057
5058 // Constants for increment
5059 operand immI_16()
5060 %{
5061 predicate(n->get_int() == 16);
5062 match(ConI);
5063
5064 format %{ %}
5065 interface(CONST_INTER);
5066 %}
5067
5068 operand immI_24()
5069 %{
5070 predicate(n->get_int() == 24);
5071 match(ConI);
5072
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Constant for byte-wide masking
5078 operand immI_255()
5079 %{
5080 predicate(n->get_int() == 255);
5081 match(ConI);
5082
5083 format %{ %}
5084 interface(CONST_INTER);
5085 %}
5086
5087 // Constant for short-wide masking
5088 operand immI_65535()
5089 %{
5090 predicate(n->get_int() == 65535);
5091 match(ConI);
5092
5093 format %{ %}
5094 interface(CONST_INTER);
5095 %}
5096
5097 // Constant for byte-wide masking
5098 operand immL_255()
5099 %{
5100 predicate(n->get_long() == 255);
5101 match(ConL);
5102
5103 format %{ %}
5104 interface(CONST_INTER);
5105 %}
5106
5107 // Constant for short-wide masking
5108 operand immL_65535()
5109 %{
5110 predicate(n->get_long() == 65535);
5111 match(ConL);
5112
5113 format %{ %}
5114 interface(CONST_INTER);
5115 %}
5116
5117 operand kReg()
5118 %{
5119 constraint(ALLOC_IN_RC(vectmask_reg));
5120 match(RegVectMask);
5121 format %{%}
5122 interface(REG_INTER);
5123 %}
5124
5125 // Register Operands
5126 // Integer Register
5127 operand rRegI()
5128 %{
5129 constraint(ALLOC_IN_RC(int_reg));
5130 match(RegI);
5131
5132 match(rax_RegI);
5133 match(rbx_RegI);
5134 match(rcx_RegI);
5135 match(rdx_RegI);
5136 match(rdi_RegI);
5137
5138 format %{ %}
5139 interface(REG_INTER);
5140 %}
5141
5142 // Special Registers
5143 operand rax_RegI()
5144 %{
5145 constraint(ALLOC_IN_RC(int_rax_reg));
5146 match(RegI);
5147 match(rRegI);
5148
5149 format %{ "RAX" %}
5150 interface(REG_INTER);
5151 %}
5152
5153 // Special Registers
5154 operand rbx_RegI()
5155 %{
5156 constraint(ALLOC_IN_RC(int_rbx_reg));
5157 match(RegI);
5158 match(rRegI);
5159
5160 format %{ "RBX" %}
5161 interface(REG_INTER);
5162 %}
5163
5164 operand rcx_RegI()
5165 %{
5166 constraint(ALLOC_IN_RC(int_rcx_reg));
5167 match(RegI);
5168 match(rRegI);
5169
5170 format %{ "RCX" %}
5171 interface(REG_INTER);
5172 %}
5173
5174 operand rdx_RegI()
5175 %{
5176 constraint(ALLOC_IN_RC(int_rdx_reg));
5177 match(RegI);
5178 match(rRegI);
5179
5180 format %{ "RDX" %}
5181 interface(REG_INTER);
5182 %}
5183
5184 operand rdi_RegI()
5185 %{
5186 constraint(ALLOC_IN_RC(int_rdi_reg));
5187 match(RegI);
5188 match(rRegI);
5189
5190 format %{ "RDI" %}
5191 interface(REG_INTER);
5192 %}
5193
5194 operand no_rax_rdx_RegI()
5195 %{
5196 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5197 match(RegI);
5198 match(rbx_RegI);
5199 match(rcx_RegI);
5200 match(rdi_RegI);
5201
5202 format %{ %}
5203 interface(REG_INTER);
5204 %}
5205
5206 operand no_rbp_r13_RegI()
5207 %{
5208 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5209 match(RegI);
5210 match(rRegI);
5211 match(rax_RegI);
5212 match(rbx_RegI);
5213 match(rcx_RegI);
5214 match(rdx_RegI);
5215 match(rdi_RegI);
5216
5217 format %{ %}
5218 interface(REG_INTER);
5219 %}
5220
5221 // Pointer Register
5222 operand any_RegP()
5223 %{
5224 constraint(ALLOC_IN_RC(any_reg));
5225 match(RegP);
5226 match(rax_RegP);
5227 match(rbx_RegP);
5228 match(rdi_RegP);
5229 match(rsi_RegP);
5230 match(rbp_RegP);
5231 match(r15_RegP);
5232 match(rRegP);
5233
5234 format %{ %}
5235 interface(REG_INTER);
5236 %}
5237
5238 operand rRegP()
5239 %{
5240 constraint(ALLOC_IN_RC(ptr_reg));
5241 match(RegP);
5242 match(rax_RegP);
5243 match(rbx_RegP);
5244 match(rdi_RegP);
5245 match(rsi_RegP);
5246 match(rbp_RegP); // See Q&A below about
5247 match(r15_RegP); // r15_RegP and rbp_RegP.
5248
5249 format %{ %}
5250 interface(REG_INTER);
5251 %}
5252
5253 operand rRegN() %{
5254 constraint(ALLOC_IN_RC(int_reg));
5255 match(RegN);
5256
5257 format %{ %}
5258 interface(REG_INTER);
5259 %}
5260
5261 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5262 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5263 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5264 // The output of an instruction is controlled by the allocator, which respects
5265 // register class masks, not match rules. Unless an instruction mentions
5266 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5267 // by the allocator as an input.
5268 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5269 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5270 // result, RBP is not included in the output of the instruction either.
5271
5272 // This operand is not allowed to use RBP even if
5273 // RBP is not used to hold the frame pointer.
5274 operand no_rbp_RegP()
5275 %{
5276 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5277 match(RegP);
5278 match(rbx_RegP);
5279 match(rsi_RegP);
5280 match(rdi_RegP);
5281
5282 format %{ %}
5283 interface(REG_INTER);
5284 %}
5285
5286 // Special Registers
5287 // Return a pointer value
5288 operand rax_RegP()
5289 %{
5290 constraint(ALLOC_IN_RC(ptr_rax_reg));
5291 match(RegP);
5292 match(rRegP);
5293
5294 format %{ %}
5295 interface(REG_INTER);
5296 %}
5297
5298 // Special Registers
5299 // Return a compressed pointer value
5300 operand rax_RegN()
5301 %{
5302 constraint(ALLOC_IN_RC(int_rax_reg));
5303 match(RegN);
5304 match(rRegN);
5305
5306 format %{ %}
5307 interface(REG_INTER);
5308 %}
5309
5310 // Used in AtomicAdd
5311 operand rbx_RegP()
5312 %{
5313 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5314 match(RegP);
5315 match(rRegP);
5316
5317 format %{ %}
5318 interface(REG_INTER);
5319 %}
5320
5321 operand rsi_RegP()
5322 %{
5323 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5324 match(RegP);
5325 match(rRegP);
5326
5327 format %{ %}
5328 interface(REG_INTER);
5329 %}
5330
5331 operand rbp_RegP()
5332 %{
5333 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5334 match(RegP);
5335 match(rRegP);
5336
5337 format %{ %}
5338 interface(REG_INTER);
5339 %}
5340
5341 // Used in rep stosq
5342 operand rdi_RegP()
5343 %{
5344 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5345 match(RegP);
5346 match(rRegP);
5347
5348 format %{ %}
5349 interface(REG_INTER);
5350 %}
5351
5352 operand r15_RegP()
5353 %{
5354 constraint(ALLOC_IN_RC(ptr_r15_reg));
5355 match(RegP);
5356 match(rRegP);
5357
5358 format %{ %}
5359 interface(REG_INTER);
5360 %}
5361
5362 operand rRegL()
5363 %{
5364 constraint(ALLOC_IN_RC(long_reg));
5365 match(RegL);
5366 match(rax_RegL);
5367 match(rdx_RegL);
5368
5369 format %{ %}
5370 interface(REG_INTER);
5371 %}
5372
5373 // Special Registers
5374 operand no_rax_rdx_RegL()
5375 %{
5376 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5377 match(RegL);
5378 match(rRegL);
5379
5380 format %{ %}
5381 interface(REG_INTER);
5382 %}
5383
5384 operand rax_RegL()
5385 %{
5386 constraint(ALLOC_IN_RC(long_rax_reg));
5387 match(RegL);
5388 match(rRegL);
5389
5390 format %{ "RAX" %}
5391 interface(REG_INTER);
5392 %}
5393
5394 operand rcx_RegL()
5395 %{
5396 constraint(ALLOC_IN_RC(long_rcx_reg));
5397 match(RegL);
5398 match(rRegL);
5399
5400 format %{ %}
5401 interface(REG_INTER);
5402 %}
5403
5404 operand rdx_RegL()
5405 %{
5406 constraint(ALLOC_IN_RC(long_rdx_reg));
5407 match(RegL);
5408 match(rRegL);
5409
5410 format %{ %}
5411 interface(REG_INTER);
5412 %}
5413
5414 operand r11_RegL()
5415 %{
5416 constraint(ALLOC_IN_RC(long_r11_reg));
5417 match(RegL);
5418 match(rRegL);
5419
5420 format %{ %}
5421 interface(REG_INTER);
5422 %}
5423
5424 operand no_rbp_r13_RegL()
5425 %{
5426 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5427 match(RegL);
5428 match(rRegL);
5429 match(rax_RegL);
5430 match(rcx_RegL);
5431 match(rdx_RegL);
5432
5433 format %{ %}
5434 interface(REG_INTER);
5435 %}
5436
5437 // Flags register, used as output of compare instructions
5438 operand rFlagsReg()
5439 %{
5440 constraint(ALLOC_IN_RC(int_flags));
5441 match(RegFlags);
5442
5443 format %{ "RFLAGS" %}
5444 interface(REG_INTER);
5445 %}
5446
5447 // Flags register, used as output of FLOATING POINT compare instructions
5448 operand rFlagsRegU()
5449 %{
5450 constraint(ALLOC_IN_RC(int_flags));
5451 match(RegFlags);
5452
5453 format %{ "RFLAGS_U" %}
5454 interface(REG_INTER);
5455 %}
5456
5457 operand rFlagsRegUCF() %{
5458 constraint(ALLOC_IN_RC(int_flags));
5459 match(RegFlags);
5460 predicate(false);
5461
5462 format %{ "RFLAGS_U_CF" %}
5463 interface(REG_INTER);
5464 %}
5465
5466 // Float register operands
5467 operand regF() %{
5468 constraint(ALLOC_IN_RC(float_reg));
5469 match(RegF);
5470
5471 format %{ %}
5472 interface(REG_INTER);
5473 %}
5474
5475 // Float register operands
5476 operand legRegF() %{
5477 constraint(ALLOC_IN_RC(float_reg_legacy));
5478 match(RegF);
5479
5480 format %{ %}
5481 interface(REG_INTER);
5482 %}
5483
5484 // Float register operands
5485 operand vlRegF() %{
5486 constraint(ALLOC_IN_RC(float_reg_vl));
5487 match(RegF);
5488
5489 format %{ %}
5490 interface(REG_INTER);
5491 %}
5492
5493 // Double register operands
5494 operand regD() %{
5495 constraint(ALLOC_IN_RC(double_reg));
5496 match(RegD);
5497
5498 format %{ %}
5499 interface(REG_INTER);
5500 %}
5501
5502 // Double register operands
5503 operand legRegD() %{
5504 constraint(ALLOC_IN_RC(double_reg_legacy));
5505 match(RegD);
5506
5507 format %{ %}
5508 interface(REG_INTER);
5509 %}
5510
5511 // Double register operands
5512 operand vlRegD() %{
5513 constraint(ALLOC_IN_RC(double_reg_vl));
5514 match(RegD);
5515
5516 format %{ %}
5517 interface(REG_INTER);
5518 %}
5519
5520 //----------Memory Operands----------------------------------------------------
5521 // Direct Memory Operand
5522 // operand direct(immP addr)
5523 // %{
5524 // match(addr);
5525
5526 // format %{ "[$addr]" %}
5527 // interface(MEMORY_INTER) %{
5528 // base(0xFFFFFFFF);
5529 // index(0x4);
5530 // scale(0x0);
5531 // disp($addr);
5532 // %}
5533 // %}
5534
5535 // Indirect Memory Operand
5536 operand indirect(any_RegP reg)
5537 %{
5538 constraint(ALLOC_IN_RC(ptr_reg));
5539 match(reg);
5540
5541 format %{ "[$reg]" %}
5542 interface(MEMORY_INTER) %{
5543 base($reg);
5544 index(0x4);
5545 scale(0x0);
5546 disp(0x0);
5547 %}
5548 %}
5549
5550 // Indirect Memory Plus Short Offset Operand
5551 operand indOffset8(any_RegP reg, immL8 off)
5552 %{
5553 constraint(ALLOC_IN_RC(ptr_reg));
5554 match(AddP reg off);
5555
5556 format %{ "[$reg + $off (8-bit)]" %}
5557 interface(MEMORY_INTER) %{
5558 base($reg);
5559 index(0x4);
5560 scale(0x0);
5561 disp($off);
5562 %}
5563 %}
5564
5565 // Indirect Memory Plus Long Offset Operand
5566 operand indOffset32(any_RegP reg, immL32 off)
5567 %{
5568 constraint(ALLOC_IN_RC(ptr_reg));
5569 match(AddP reg off);
5570
5571 format %{ "[$reg + $off (32-bit)]" %}
5572 interface(MEMORY_INTER) %{
5573 base($reg);
5574 index(0x4);
5575 scale(0x0);
5576 disp($off);
5577 %}
5578 %}
5579
5580 // Indirect Memory Plus Index Register Plus Offset Operand
5581 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5582 %{
5583 constraint(ALLOC_IN_RC(ptr_reg));
5584 match(AddP (AddP reg lreg) off);
5585
5586 op_cost(10);
5587 format %{"[$reg + $off + $lreg]" %}
5588 interface(MEMORY_INTER) %{
5589 base($reg);
5590 index($lreg);
5591 scale(0x0);
5592 disp($off);
5593 %}
5594 %}
5595
5596 // Indirect Memory Plus Index Register Plus Offset Operand
5597 operand indIndex(any_RegP reg, rRegL lreg)
5598 %{
5599 constraint(ALLOC_IN_RC(ptr_reg));
5600 match(AddP reg lreg);
5601
5602 op_cost(10);
5603 format %{"[$reg + $lreg]" %}
5604 interface(MEMORY_INTER) %{
5605 base($reg);
5606 index($lreg);
5607 scale(0x0);
5608 disp(0x0);
5609 %}
5610 %}
5611
5612 // Indirect Memory Times Scale Plus Index Register
5613 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5614 %{
5615 constraint(ALLOC_IN_RC(ptr_reg));
5616 match(AddP reg (LShiftL lreg scale));
5617
5618 op_cost(10);
5619 format %{"[$reg + $lreg << $scale]" %}
5620 interface(MEMORY_INTER) %{
5621 base($reg);
5622 index($lreg);
5623 scale($scale);
5624 disp(0x0);
5625 %}
5626 %}
5627
5628 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5629 %{
5630 constraint(ALLOC_IN_RC(ptr_reg));
5631 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5632 match(AddP reg (LShiftL (ConvI2L idx) scale));
5633
5634 op_cost(10);
5635 format %{"[$reg + pos $idx << $scale]" %}
5636 interface(MEMORY_INTER) %{
5637 base($reg);
5638 index($idx);
5639 scale($scale);
5640 disp(0x0);
5641 %}
5642 %}
5643
5644 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5645 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5646 %{
5647 constraint(ALLOC_IN_RC(ptr_reg));
5648 match(AddP (AddP reg (LShiftL lreg scale)) off);
5649
5650 op_cost(10);
5651 format %{"[$reg + $off + $lreg << $scale]" %}
5652 interface(MEMORY_INTER) %{
5653 base($reg);
5654 index($lreg);
5655 scale($scale);
5656 disp($off);
5657 %}
5658 %}
5659
5660 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5661 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5662 %{
5663 constraint(ALLOC_IN_RC(ptr_reg));
5664 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5665 match(AddP (AddP reg (ConvI2L idx)) off);
5666
5667 op_cost(10);
5668 format %{"[$reg + $off + $idx]" %}
5669 interface(MEMORY_INTER) %{
5670 base($reg);
5671 index($idx);
5672 scale(0x0);
5673 disp($off);
5674 %}
5675 %}
5676
5677 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5678 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5679 %{
5680 constraint(ALLOC_IN_RC(ptr_reg));
5681 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5682 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5683
5684 op_cost(10);
5685 format %{"[$reg + $off + $idx << $scale]" %}
5686 interface(MEMORY_INTER) %{
5687 base($reg);
5688 index($idx);
5689 scale($scale);
5690 disp($off);
5691 %}
5692 %}
5693
5694 // Indirect Narrow Oop Plus Offset Operand
5695 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5696 // we can't free r12 even with CompressedOops::base() == nullptr.
5697 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5698 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5699 constraint(ALLOC_IN_RC(ptr_reg));
5700 match(AddP (DecodeN reg) off);
5701
5702 op_cost(10);
5703 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5704 interface(MEMORY_INTER) %{
5705 base(0xc); // R12
5706 index($reg);
5707 scale(0x3);
5708 disp($off);
5709 %}
5710 %}
5711
5712 // Indirect Memory Operand
5713 operand indirectNarrow(rRegN reg)
5714 %{
5715 predicate(CompressedOops::shift() == 0);
5716 constraint(ALLOC_IN_RC(ptr_reg));
5717 match(DecodeN reg);
5718
5719 format %{ "[$reg]" %}
5720 interface(MEMORY_INTER) %{
5721 base($reg);
5722 index(0x4);
5723 scale(0x0);
5724 disp(0x0);
5725 %}
5726 %}
5727
5728 // Indirect Memory Plus Short Offset Operand
5729 operand indOffset8Narrow(rRegN reg, immL8 off)
5730 %{
5731 predicate(CompressedOops::shift() == 0);
5732 constraint(ALLOC_IN_RC(ptr_reg));
5733 match(AddP (DecodeN reg) off);
5734
5735 format %{ "[$reg + $off (8-bit)]" %}
5736 interface(MEMORY_INTER) %{
5737 base($reg);
5738 index(0x4);
5739 scale(0x0);
5740 disp($off);
5741 %}
5742 %}
5743
5744 // Indirect Memory Plus Long Offset Operand
5745 operand indOffset32Narrow(rRegN reg, immL32 off)
5746 %{
5747 predicate(CompressedOops::shift() == 0);
5748 constraint(ALLOC_IN_RC(ptr_reg));
5749 match(AddP (DecodeN reg) off);
5750
5751 format %{ "[$reg + $off (32-bit)]" %}
5752 interface(MEMORY_INTER) %{
5753 base($reg);
5754 index(0x4);
5755 scale(0x0);
5756 disp($off);
5757 %}
5758 %}
5759
5760 // Indirect Memory Plus Index Register Plus Offset Operand
5761 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5762 %{
5763 predicate(CompressedOops::shift() == 0);
5764 constraint(ALLOC_IN_RC(ptr_reg));
5765 match(AddP (AddP (DecodeN reg) lreg) off);
5766
5767 op_cost(10);
5768 format %{"[$reg + $off + $lreg]" %}
5769 interface(MEMORY_INTER) %{
5770 base($reg);
5771 index($lreg);
5772 scale(0x0);
5773 disp($off);
5774 %}
5775 %}
5776
5777 // Indirect Memory Plus Index Register Plus Offset Operand
5778 operand indIndexNarrow(rRegN reg, rRegL lreg)
5779 %{
5780 predicate(CompressedOops::shift() == 0);
5781 constraint(ALLOC_IN_RC(ptr_reg));
5782 match(AddP (DecodeN reg) lreg);
5783
5784 op_cost(10);
5785 format %{"[$reg + $lreg]" %}
5786 interface(MEMORY_INTER) %{
5787 base($reg);
5788 index($lreg);
5789 scale(0x0);
5790 disp(0x0);
5791 %}
5792 %}
5793
5794 // Indirect Memory Times Scale Plus Index Register
5795 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5796 %{
5797 predicate(CompressedOops::shift() == 0);
5798 constraint(ALLOC_IN_RC(ptr_reg));
5799 match(AddP (DecodeN reg) (LShiftL lreg scale));
5800
5801 op_cost(10);
5802 format %{"[$reg + $lreg << $scale]" %}
5803 interface(MEMORY_INTER) %{
5804 base($reg);
5805 index($lreg);
5806 scale($scale);
5807 disp(0x0);
5808 %}
5809 %}
5810
5811 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5812 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5813 %{
5814 predicate(CompressedOops::shift() == 0);
5815 constraint(ALLOC_IN_RC(ptr_reg));
5816 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5817
5818 op_cost(10);
5819 format %{"[$reg + $off + $lreg << $scale]" %}
5820 interface(MEMORY_INTER) %{
5821 base($reg);
5822 index($lreg);
5823 scale($scale);
5824 disp($off);
5825 %}
5826 %}
5827
5828 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5829 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5830 %{
5831 constraint(ALLOC_IN_RC(ptr_reg));
5832 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5833 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5834
5835 op_cost(10);
5836 format %{"[$reg + $off + $idx]" %}
5837 interface(MEMORY_INTER) %{
5838 base($reg);
5839 index($idx);
5840 scale(0x0);
5841 disp($off);
5842 %}
5843 %}
5844
5845 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5846 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5847 %{
5848 constraint(ALLOC_IN_RC(ptr_reg));
5849 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5850 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5851
5852 op_cost(10);
5853 format %{"[$reg + $off + $idx << $scale]" %}
5854 interface(MEMORY_INTER) %{
5855 base($reg);
5856 index($idx);
5857 scale($scale);
5858 disp($off);
5859 %}
5860 %}
5861
5862 //----------Special Memory Operands--------------------------------------------
5863 // Stack Slot Operand - This operand is used for loading and storing temporary
5864 // values on the stack where a match requires a value to
5865 // flow through memory.
5866 operand stackSlotP(sRegP reg)
5867 %{
5868 constraint(ALLOC_IN_RC(stack_slots));
5869 // No match rule because this operand is only generated in matching
5870
5871 format %{ "[$reg]" %}
5872 interface(MEMORY_INTER) %{
5873 base(0x4); // RSP
5874 index(0x4); // No Index
5875 scale(0x0); // No Scale
5876 disp($reg); // Stack Offset
5877 %}
5878 %}
5879
5880 operand stackSlotI(sRegI reg)
5881 %{
5882 constraint(ALLOC_IN_RC(stack_slots));
5883 // No match rule because this operand is only generated in matching
5884
5885 format %{ "[$reg]" %}
5886 interface(MEMORY_INTER) %{
5887 base(0x4); // RSP
5888 index(0x4); // No Index
5889 scale(0x0); // No Scale
5890 disp($reg); // Stack Offset
5891 %}
5892 %}
5893
5894 operand stackSlotF(sRegF reg)
5895 %{
5896 constraint(ALLOC_IN_RC(stack_slots));
5897 // No match rule because this operand is only generated in matching
5898
5899 format %{ "[$reg]" %}
5900 interface(MEMORY_INTER) %{
5901 base(0x4); // RSP
5902 index(0x4); // No Index
5903 scale(0x0); // No Scale
5904 disp($reg); // Stack Offset
5905 %}
5906 %}
5907
5908 operand stackSlotD(sRegD reg)
5909 %{
5910 constraint(ALLOC_IN_RC(stack_slots));
5911 // No match rule because this operand is only generated in matching
5912
5913 format %{ "[$reg]" %}
5914 interface(MEMORY_INTER) %{
5915 base(0x4); // RSP
5916 index(0x4); // No Index
5917 scale(0x0); // No Scale
5918 disp($reg); // Stack Offset
5919 %}
5920 %}
5921 operand stackSlotL(sRegL reg)
5922 %{
5923 constraint(ALLOC_IN_RC(stack_slots));
5924 // No match rule because this operand is only generated in matching
5925
5926 format %{ "[$reg]" %}
5927 interface(MEMORY_INTER) %{
5928 base(0x4); // RSP
5929 index(0x4); // No Index
5930 scale(0x0); // No Scale
5931 disp($reg); // Stack Offset
5932 %}
5933 %}
5934
5935 //----------Conditional Branch Operands----------------------------------------
5936 // Comparison Op - This is the operation of the comparison, and is limited to
5937 // the following set of codes:
5938 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5939 //
5940 // Other attributes of the comparison, such as unsignedness, are specified
5941 // by the comparison instruction that sets a condition code flags register.
5942 // That result is represented by a flags operand whose subtype is appropriate
5943 // to the unsignedness (etc.) of the comparison.
5944 //
5945 // Later, the instruction which matches both the Comparison Op (a Bool) and
5946 // the flags (produced by the Cmp) specifies the coding of the comparison op
5947 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5948
5949 // Comparison Code
5950 operand cmpOp()
5951 %{
5952 match(Bool);
5953
5954 format %{ "" %}
5955 interface(COND_INTER) %{
5956 equal(0x4, "e");
5957 not_equal(0x5, "ne");
5958 less(0xC, "l");
5959 greater_equal(0xD, "ge");
5960 less_equal(0xE, "le");
5961 greater(0xF, "g");
5962 overflow(0x0, "o");
5963 no_overflow(0x1, "no");
5964 %}
5965 %}
5966
5967 // Comparison Code, unsigned compare. Used by FP also, with
5968 // C2 (unordered) turned into GT or LT already. The other bits
5969 // C0 and C3 are turned into Carry & Zero flags.
5970 operand cmpOpU()
5971 %{
5972 match(Bool);
5973
5974 format %{ "" %}
5975 interface(COND_INTER) %{
5976 equal(0x4, "e");
5977 not_equal(0x5, "ne");
5978 less(0x2, "b");
5979 greater_equal(0x3, "ae");
5980 less_equal(0x6, "be");
5981 greater(0x7, "a");
5982 overflow(0x0, "o");
5983 no_overflow(0x1, "no");
5984 %}
5985 %}
5986
5987
5988 // Floating comparisons that don't require any fixup for the unordered case,
5989 // If both inputs of the comparison are the same, ZF is always set so we
5990 // don't need to use cmpOpUCF2 for eq/ne
5991 operand cmpOpUCF() %{
5992 match(Bool);
5993 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5994 n->as_Bool()->_test._test == BoolTest::ge ||
5995 n->as_Bool()->_test._test == BoolTest::le ||
5996 n->as_Bool()->_test._test == BoolTest::gt ||
5997 n->in(1)->in(1) == n->in(1)->in(2));
5998 format %{ "" %}
5999 interface(COND_INTER) %{
6000 equal(0xb, "np");
6001 not_equal(0xa, "p");
6002 less(0x2, "b");
6003 greater_equal(0x3, "ae");
6004 less_equal(0x6, "be");
6005 greater(0x7, "a");
6006 overflow(0x0, "o");
6007 no_overflow(0x1, "no");
6008 %}
6009 %}
6010
6011
6012 // Floating comparisons that can be fixed up with extra conditional jumps
6013 operand cmpOpUCF2() %{
6014 match(Bool);
6015 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6016 n->as_Bool()->_test._test == BoolTest::eq) &&
6017 n->in(1)->in(1) != n->in(1)->in(2));
6018 format %{ "" %}
6019 interface(COND_INTER) %{
6020 equal(0x4, "e");
6021 not_equal(0x5, "ne");
6022 less(0x2, "b");
6023 greater_equal(0x3, "ae");
6024 less_equal(0x6, "be");
6025 greater(0x7, "a");
6026 overflow(0x0, "o");
6027 no_overflow(0x1, "no");
6028 %}
6029 %}
6030
6031 // Operands for bound floating pointer register arguments
6032 operand rxmm0() %{
6033 constraint(ALLOC_IN_RC(xmm0_reg));
6034 match(VecX);
6035 format%{%}
6036 interface(REG_INTER);
6037 %}
6038
6039 // Vectors
6040
6041 // Dummy generic vector class. Should be used for all vector operands.
6042 // Replaced with vec[SDXYZ] during post-selection pass.
6043 operand vec() %{
6044 constraint(ALLOC_IN_RC(dynamic));
6045 match(VecX);
6046 match(VecY);
6047 match(VecZ);
6048 match(VecS);
6049 match(VecD);
6050
6051 format %{ %}
6052 interface(REG_INTER);
6053 %}
6054
6055 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6056 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6057 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6058 // runtime code generation via reg_class_dynamic.
6059 operand legVec() %{
6060 constraint(ALLOC_IN_RC(dynamic));
6061 match(VecX);
6062 match(VecY);
6063 match(VecZ);
6064 match(VecS);
6065 match(VecD);
6066
6067 format %{ %}
6068 interface(REG_INTER);
6069 %}
6070
6071 // Replaces vec during post-selection cleanup. See above.
6072 operand vecS() %{
6073 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6074 match(VecS);
6075
6076 format %{ %}
6077 interface(REG_INTER);
6078 %}
6079
6080 // Replaces legVec during post-selection cleanup. See above.
6081 operand legVecS() %{
6082 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6083 match(VecS);
6084
6085 format %{ %}
6086 interface(REG_INTER);
6087 %}
6088
6089 // Replaces vec during post-selection cleanup. See above.
6090 operand vecD() %{
6091 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6092 match(VecD);
6093
6094 format %{ %}
6095 interface(REG_INTER);
6096 %}
6097
6098 // Replaces legVec during post-selection cleanup. See above.
6099 operand legVecD() %{
6100 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6101 match(VecD);
6102
6103 format %{ %}
6104 interface(REG_INTER);
6105 %}
6106
6107 // Replaces vec during post-selection cleanup. See above.
6108 operand vecX() %{
6109 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6110 match(VecX);
6111
6112 format %{ %}
6113 interface(REG_INTER);
6114 %}
6115
6116 // Replaces legVec during post-selection cleanup. See above.
6117 operand legVecX() %{
6118 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6119 match(VecX);
6120
6121 format %{ %}
6122 interface(REG_INTER);
6123 %}
6124
6125 // Replaces vec during post-selection cleanup. See above.
6126 operand vecY() %{
6127 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6128 match(VecY);
6129
6130 format %{ %}
6131 interface(REG_INTER);
6132 %}
6133
6134 // Replaces legVec during post-selection cleanup. See above.
6135 operand legVecY() %{
6136 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6137 match(VecY);
6138
6139 format %{ %}
6140 interface(REG_INTER);
6141 %}
6142
6143 // Replaces vec during post-selection cleanup. See above.
6144 operand vecZ() %{
6145 constraint(ALLOC_IN_RC(vectorz_reg));
6146 match(VecZ);
6147
6148 format %{ %}
6149 interface(REG_INTER);
6150 %}
6151
6152 // Replaces legVec during post-selection cleanup. See above.
6153 operand legVecZ() %{
6154 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6155 match(VecZ);
6156
6157 format %{ %}
6158 interface(REG_INTER);
6159 %}
6160
6161 //----------OPERAND CLASSES----------------------------------------------------
6162 // Operand Classes are groups of operands that are used as to simplify
6163 // instruction definitions by not requiring the AD writer to specify separate
6164 // instructions for every form of operand when the instruction accepts
6165 // multiple operand types with the same basic encoding and format. The classic
6166 // case of this is memory operands.
6167
6168 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6169 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6170 indCompressedOopOffset,
6171 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6172 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6173 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6174
6175 //----------PIPELINE-----------------------------------------------------------
6176 // Rules which define the behavior of the target architectures pipeline.
6177 pipeline %{
6178
6179 //----------ATTRIBUTES---------------------------------------------------------
6180 attributes %{
6181 variable_size_instructions; // Fixed size instructions
6182 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6183 instruction_unit_size = 1; // An instruction is 1 bytes long
6184 instruction_fetch_unit_size = 16; // The processor fetches one line
6185 instruction_fetch_units = 1; // of 16 bytes
6186 %}
6187
6188 //----------RESOURCES----------------------------------------------------------
6189 // Resources are the functional units available to the machine
6190
6191 // Generic P2/P3 pipeline
6192 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6193 // 3 instructions decoded per cycle.
6194 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6195 // 3 ALU op, only ALU0 handles mul instructions.
6196 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6197 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6198 BR, FPU,
6199 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6200
6201 //----------PIPELINE DESCRIPTION-----------------------------------------------
6202 // Pipeline Description specifies the stages in the machine's pipeline
6203
6204 // Generic P2/P3 pipeline
6205 pipe_desc(S0, S1, S2, S3, S4, S5);
6206
6207 //----------PIPELINE CLASSES---------------------------------------------------
6208 // Pipeline Classes describe the stages in which input and output are
6209 // referenced by the hardware pipeline.
6210
6211 // Naming convention: ialu or fpu
6212 // Then: _reg
6213 // Then: _reg if there is a 2nd register
6214 // Then: _long if it's a pair of instructions implementing a long
6215 // Then: _fat if it requires the big decoder
6216 // Or: _mem if it requires the big decoder and a memory unit.
6217
6218 // Integer ALU reg operation
6219 pipe_class ialu_reg(rRegI dst)
6220 %{
6221 single_instruction;
6222 dst : S4(write);
6223 dst : S3(read);
6224 DECODE : S0; // any decoder
6225 ALU : S3; // any alu
6226 %}
6227
6228 // Long ALU reg operation
6229 pipe_class ialu_reg_long(rRegL dst)
6230 %{
6231 instruction_count(2);
6232 dst : S4(write);
6233 dst : S3(read);
6234 DECODE : S0(2); // any 2 decoders
6235 ALU : S3(2); // both alus
6236 %}
6237
6238 // Integer ALU reg operation using big decoder
6239 pipe_class ialu_reg_fat(rRegI dst)
6240 %{
6241 single_instruction;
6242 dst : S4(write);
6243 dst : S3(read);
6244 D0 : S0; // big decoder only
6245 ALU : S3; // any alu
6246 %}
6247
6248 // Integer ALU reg-reg operation
6249 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6250 %{
6251 single_instruction;
6252 dst : S4(write);
6253 src : S3(read);
6254 DECODE : S0; // any decoder
6255 ALU : S3; // any alu
6256 %}
6257
6258 // Integer ALU reg-reg operation
6259 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6260 %{
6261 single_instruction;
6262 dst : S4(write);
6263 src : S3(read);
6264 D0 : S0; // big decoder only
6265 ALU : S3; // any alu
6266 %}
6267
6268 // Integer ALU reg-mem operation
6269 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6270 %{
6271 single_instruction;
6272 dst : S5(write);
6273 mem : S3(read);
6274 D0 : S0; // big decoder only
6275 ALU : S4; // any alu
6276 MEM : S3; // any mem
6277 %}
6278
6279 // Integer mem operation (prefetch)
6280 pipe_class ialu_mem(memory mem)
6281 %{
6282 single_instruction;
6283 mem : S3(read);
6284 D0 : S0; // big decoder only
6285 MEM : S3; // any mem
6286 %}
6287
6288 // Integer Store to Memory
6289 pipe_class ialu_mem_reg(memory mem, rRegI src)
6290 %{
6291 single_instruction;
6292 mem : S3(read);
6293 src : S5(read);
6294 D0 : S0; // big decoder only
6295 ALU : S4; // any alu
6296 MEM : S3;
6297 %}
6298
6299 // // Long Store to Memory
6300 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6301 // %{
6302 // instruction_count(2);
6303 // mem : S3(read);
6304 // src : S5(read);
6305 // D0 : S0(2); // big decoder only; twice
6306 // ALU : S4(2); // any 2 alus
6307 // MEM : S3(2); // Both mems
6308 // %}
6309
6310 // Integer Store to Memory
6311 pipe_class ialu_mem_imm(memory mem)
6312 %{
6313 single_instruction;
6314 mem : S3(read);
6315 D0 : S0; // big decoder only
6316 ALU : S4; // any alu
6317 MEM : S3;
6318 %}
6319
6320 // Integer ALU0 reg-reg operation
6321 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6322 %{
6323 single_instruction;
6324 dst : S4(write);
6325 src : S3(read);
6326 D0 : S0; // Big decoder only
6327 ALU0 : S3; // only alu0
6328 %}
6329
6330 // Integer ALU0 reg-mem operation
6331 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6332 %{
6333 single_instruction;
6334 dst : S5(write);
6335 mem : S3(read);
6336 D0 : S0; // big decoder only
6337 ALU0 : S4; // ALU0 only
6338 MEM : S3; // any mem
6339 %}
6340
6341 // Integer ALU reg-reg operation
6342 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6343 %{
6344 single_instruction;
6345 cr : S4(write);
6346 src1 : S3(read);
6347 src2 : S3(read);
6348 DECODE : S0; // any decoder
6349 ALU : S3; // any alu
6350 %}
6351
6352 // Integer ALU reg-imm operation
6353 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6354 %{
6355 single_instruction;
6356 cr : S4(write);
6357 src1 : S3(read);
6358 DECODE : S0; // any decoder
6359 ALU : S3; // any alu
6360 %}
6361
6362 // Integer ALU reg-mem operation
6363 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6364 %{
6365 single_instruction;
6366 cr : S4(write);
6367 src1 : S3(read);
6368 src2 : S3(read);
6369 D0 : S0; // big decoder only
6370 ALU : S4; // any alu
6371 MEM : S3;
6372 %}
6373
6374 // Conditional move reg-reg
6375 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6376 %{
6377 instruction_count(4);
6378 y : S4(read);
6379 q : S3(read);
6380 p : S3(read);
6381 DECODE : S0(4); // any decoder
6382 %}
6383
6384 // Conditional move reg-reg
6385 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6386 %{
6387 single_instruction;
6388 dst : S4(write);
6389 src : S3(read);
6390 cr : S3(read);
6391 DECODE : S0; // any decoder
6392 %}
6393
6394 // Conditional move reg-mem
6395 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6396 %{
6397 single_instruction;
6398 dst : S4(write);
6399 src : S3(read);
6400 cr : S3(read);
6401 DECODE : S0; // any decoder
6402 MEM : S3;
6403 %}
6404
6405 // Conditional move reg-reg long
6406 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6407 %{
6408 single_instruction;
6409 dst : S4(write);
6410 src : S3(read);
6411 cr : S3(read);
6412 DECODE : S0(2); // any 2 decoders
6413 %}
6414
6415 // Float reg-reg operation
6416 pipe_class fpu_reg(regD dst)
6417 %{
6418 instruction_count(2);
6419 dst : S3(read);
6420 DECODE : S0(2); // any 2 decoders
6421 FPU : S3;
6422 %}
6423
6424 // Float reg-reg operation
6425 pipe_class fpu_reg_reg(regD dst, regD src)
6426 %{
6427 instruction_count(2);
6428 dst : S4(write);
6429 src : S3(read);
6430 DECODE : S0(2); // any 2 decoders
6431 FPU : S3;
6432 %}
6433
6434 // Float reg-reg operation
6435 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6436 %{
6437 instruction_count(3);
6438 dst : S4(write);
6439 src1 : S3(read);
6440 src2 : S3(read);
6441 DECODE : S0(3); // any 3 decoders
6442 FPU : S3(2);
6443 %}
6444
6445 // Float reg-reg operation
6446 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6447 %{
6448 instruction_count(4);
6449 dst : S4(write);
6450 src1 : S3(read);
6451 src2 : S3(read);
6452 src3 : S3(read);
6453 DECODE : S0(4); // any 3 decoders
6454 FPU : S3(2);
6455 %}
6456
6457 // Float reg-reg operation
6458 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6459 %{
6460 instruction_count(4);
6461 dst : S4(write);
6462 src1 : S3(read);
6463 src2 : S3(read);
6464 src3 : S3(read);
6465 DECODE : S1(3); // any 3 decoders
6466 D0 : S0; // Big decoder only
6467 FPU : S3(2);
6468 MEM : S3;
6469 %}
6470
6471 // Float reg-mem operation
6472 pipe_class fpu_reg_mem(regD dst, memory mem)
6473 %{
6474 instruction_count(2);
6475 dst : S5(write);
6476 mem : S3(read);
6477 D0 : S0; // big decoder only
6478 DECODE : S1; // any decoder for FPU POP
6479 FPU : S4;
6480 MEM : S3; // any mem
6481 %}
6482
6483 // Float reg-mem operation
6484 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6485 %{
6486 instruction_count(3);
6487 dst : S5(write);
6488 src1 : S3(read);
6489 mem : S3(read);
6490 D0 : S0; // big decoder only
6491 DECODE : S1(2); // any decoder for FPU POP
6492 FPU : S4;
6493 MEM : S3; // any mem
6494 %}
6495
6496 // Float mem-reg operation
6497 pipe_class fpu_mem_reg(memory mem, regD src)
6498 %{
6499 instruction_count(2);
6500 src : S5(read);
6501 mem : S3(read);
6502 DECODE : S0; // any decoder for FPU PUSH
6503 D0 : S1; // big decoder only
6504 FPU : S4;
6505 MEM : S3; // any mem
6506 %}
6507
6508 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6509 %{
6510 instruction_count(3);
6511 src1 : S3(read);
6512 src2 : S3(read);
6513 mem : S3(read);
6514 DECODE : S0(2); // any decoder for FPU PUSH
6515 D0 : S1; // big decoder only
6516 FPU : S4;
6517 MEM : S3; // any mem
6518 %}
6519
6520 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6521 %{
6522 instruction_count(3);
6523 src1 : S3(read);
6524 src2 : S3(read);
6525 mem : S4(read);
6526 DECODE : S0; // any decoder for FPU PUSH
6527 D0 : S0(2); // big decoder only
6528 FPU : S4;
6529 MEM : S3(2); // any mem
6530 %}
6531
6532 pipe_class fpu_mem_mem(memory dst, memory src1)
6533 %{
6534 instruction_count(2);
6535 src1 : S3(read);
6536 dst : S4(read);
6537 D0 : S0(2); // big decoder only
6538 MEM : S3(2); // any mem
6539 %}
6540
6541 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6542 %{
6543 instruction_count(3);
6544 src1 : S3(read);
6545 src2 : S3(read);
6546 dst : S4(read);
6547 D0 : S0(3); // big decoder only
6548 FPU : S4;
6549 MEM : S3(3); // any mem
6550 %}
6551
6552 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6553 %{
6554 instruction_count(3);
6555 src1 : S4(read);
6556 mem : S4(read);
6557 DECODE : S0; // any decoder for FPU PUSH
6558 D0 : S0(2); // big decoder only
6559 FPU : S4;
6560 MEM : S3(2); // any mem
6561 %}
6562
6563 // Float load constant
6564 pipe_class fpu_reg_con(regD dst)
6565 %{
6566 instruction_count(2);
6567 dst : S5(write);
6568 D0 : S0; // big decoder only for the load
6569 DECODE : S1; // any decoder for FPU POP
6570 FPU : S4;
6571 MEM : S3; // any mem
6572 %}
6573
6574 // Float load constant
6575 pipe_class fpu_reg_reg_con(regD dst, regD src)
6576 %{
6577 instruction_count(3);
6578 dst : S5(write);
6579 src : S3(read);
6580 D0 : S0; // big decoder only for the load
6581 DECODE : S1(2); // any decoder for FPU POP
6582 FPU : S4;
6583 MEM : S3; // any mem
6584 %}
6585
6586 // UnConditional branch
6587 pipe_class pipe_jmp(label labl)
6588 %{
6589 single_instruction;
6590 BR : S3;
6591 %}
6592
6593 // Conditional branch
6594 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6595 %{
6596 single_instruction;
6597 cr : S1(read);
6598 BR : S3;
6599 %}
6600
6601 // Allocation idiom
6602 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6603 %{
6604 instruction_count(1); force_serialization;
6605 fixed_latency(6);
6606 heap_ptr : S3(read);
6607 DECODE : S0(3);
6608 D0 : S2;
6609 MEM : S3;
6610 ALU : S3(2);
6611 dst : S5(write);
6612 BR : S5;
6613 %}
6614
6615 // Generic big/slow expanded idiom
6616 pipe_class pipe_slow()
6617 %{
6618 instruction_count(10); multiple_bundles; force_serialization;
6619 fixed_latency(100);
6620 D0 : S0(2);
6621 MEM : S3(2);
6622 %}
6623
6624 // The real do-nothing guy
6625 pipe_class empty()
6626 %{
6627 instruction_count(0);
6628 %}
6629
6630 // Define the class for the Nop node
6631 define
6632 %{
6633 MachNop = empty;
6634 %}
6635
6636 %}
6637
6638 //----------INSTRUCTIONS-------------------------------------------------------
6639 //
6640 // match -- States which machine-independent subtree may be replaced
6641 // by this instruction.
6642 // ins_cost -- The estimated cost of this instruction is used by instruction
6643 // selection to identify a minimum cost tree of machine
6644 // instructions that matches a tree of machine-independent
6645 // instructions.
6646 // format -- A string providing the disassembly for this instruction.
6647 // The value of an instruction's operand may be inserted
6648 // by referring to it with a '$' prefix.
6649 // opcode -- Three instruction opcodes may be provided. These are referred
6650 // to within an encode class as $primary, $secondary, and $tertiary
6651 // rrspectively. The primary opcode is commonly used to
6652 // indicate the type of machine instruction, while secondary
6653 // and tertiary are often used for prefix options or addressing
6654 // modes.
6655 // ins_encode -- A list of encode classes with parameters. The encode class
6656 // name must have been defined in an 'enc_class' specification
6657 // in the encode section of the architecture description.
6658
6659 // ============================================================================
6660
6661 instruct ShouldNotReachHere() %{
6662 match(Halt);
6663 format %{ "stop\t# ShouldNotReachHere" %}
6664 ins_encode %{
6665 if (is_reachable()) {
6666 const char* str = __ code_string(_halt_reason);
6667 __ stop(str);
6668 }
6669 %}
6670 ins_pipe(pipe_slow);
6671 %}
6672
6673 // ============================================================================
6674
6675 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6676 // Load Float
6677 instruct MoveF2VL(vlRegF dst, regF src) %{
6678 match(Set dst src);
6679 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6680 ins_encode %{
6681 ShouldNotReachHere();
6682 %}
6683 ins_pipe( fpu_reg_reg );
6684 %}
6685
6686 // Load Float
6687 instruct MoveF2LEG(legRegF dst, regF src) %{
6688 match(Set dst src);
6689 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6690 ins_encode %{
6691 ShouldNotReachHere();
6692 %}
6693 ins_pipe( fpu_reg_reg );
6694 %}
6695
6696 // Load Float
6697 instruct MoveVL2F(regF dst, vlRegF src) %{
6698 match(Set dst src);
6699 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6700 ins_encode %{
6701 ShouldNotReachHere();
6702 %}
6703 ins_pipe( fpu_reg_reg );
6704 %}
6705
6706 // Load Float
6707 instruct MoveLEG2F(regF dst, legRegF src) %{
6708 match(Set dst src);
6709 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6710 ins_encode %{
6711 ShouldNotReachHere();
6712 %}
6713 ins_pipe( fpu_reg_reg );
6714 %}
6715
6716 // Load Double
6717 instruct MoveD2VL(vlRegD dst, regD src) %{
6718 match(Set dst src);
6719 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6720 ins_encode %{
6721 ShouldNotReachHere();
6722 %}
6723 ins_pipe( fpu_reg_reg );
6724 %}
6725
6726 // Load Double
6727 instruct MoveD2LEG(legRegD dst, regD src) %{
6728 match(Set dst src);
6729 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6730 ins_encode %{
6731 ShouldNotReachHere();
6732 %}
6733 ins_pipe( fpu_reg_reg );
6734 %}
6735
6736 // Load Double
6737 instruct MoveVL2D(regD dst, vlRegD src) %{
6738 match(Set dst src);
6739 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6740 ins_encode %{
6741 ShouldNotReachHere();
6742 %}
6743 ins_pipe( fpu_reg_reg );
6744 %}
6745
6746 // Load Double
6747 instruct MoveLEG2D(regD dst, legRegD src) %{
6748 match(Set dst src);
6749 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6750 ins_encode %{
6751 ShouldNotReachHere();
6752 %}
6753 ins_pipe( fpu_reg_reg );
6754 %}
6755
6756 //----------Load/Store/Move Instructions---------------------------------------
6757 //----------Load Instructions--------------------------------------------------
6758
6759 // Load Byte (8 bit signed)
6760 instruct loadB(rRegI dst, memory mem)
6761 %{
6762 match(Set dst (LoadB mem));
6763
6764 ins_cost(125);
6765 format %{ "movsbl $dst, $mem\t# byte" %}
6766
6767 ins_encode %{
6768 __ movsbl($dst$$Register, $mem$$Address);
6769 %}
6770
6771 ins_pipe(ialu_reg_mem);
6772 %}
6773
6774 // Load Byte (8 bit signed) into Long Register
6775 instruct loadB2L(rRegL dst, memory mem)
6776 %{
6777 match(Set dst (ConvI2L (LoadB mem)));
6778
6779 ins_cost(125);
6780 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6781
6782 ins_encode %{
6783 __ movsbq($dst$$Register, $mem$$Address);
6784 %}
6785
6786 ins_pipe(ialu_reg_mem);
6787 %}
6788
6789 // Load Unsigned Byte (8 bit UNsigned)
6790 instruct loadUB(rRegI dst, memory mem)
6791 %{
6792 match(Set dst (LoadUB mem));
6793
6794 ins_cost(125);
6795 format %{ "movzbl $dst, $mem\t# ubyte" %}
6796
6797 ins_encode %{
6798 __ movzbl($dst$$Register, $mem$$Address);
6799 %}
6800
6801 ins_pipe(ialu_reg_mem);
6802 %}
6803
6804 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6805 instruct loadUB2L(rRegL dst, memory mem)
6806 %{
6807 match(Set dst (ConvI2L (LoadUB mem)));
6808
6809 ins_cost(125);
6810 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6811
6812 ins_encode %{
6813 __ movzbq($dst$$Register, $mem$$Address);
6814 %}
6815
6816 ins_pipe(ialu_reg_mem);
6817 %}
6818
6819 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6820 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6821 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6822 effect(KILL cr);
6823
6824 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6825 "andl $dst, right_n_bits($mask, 8)" %}
6826 ins_encode %{
6827 Register Rdst = $dst$$Register;
6828 __ movzbq(Rdst, $mem$$Address);
6829 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6830 %}
6831 ins_pipe(ialu_reg_mem);
6832 %}
6833
6834 // Load Short (16 bit signed)
6835 instruct loadS(rRegI dst, memory mem)
6836 %{
6837 match(Set dst (LoadS mem));
6838
6839 ins_cost(125);
6840 format %{ "movswl $dst, $mem\t# short" %}
6841
6842 ins_encode %{
6843 __ movswl($dst$$Register, $mem$$Address);
6844 %}
6845
6846 ins_pipe(ialu_reg_mem);
6847 %}
6848
6849 // Load Short (16 bit signed) to Byte (8 bit signed)
6850 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6851 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6852
6853 ins_cost(125);
6854 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6855 ins_encode %{
6856 __ movsbl($dst$$Register, $mem$$Address);
6857 %}
6858 ins_pipe(ialu_reg_mem);
6859 %}
6860
6861 // Load Short (16 bit signed) into Long Register
6862 instruct loadS2L(rRegL dst, memory mem)
6863 %{
6864 match(Set dst (ConvI2L (LoadS mem)));
6865
6866 ins_cost(125);
6867 format %{ "movswq $dst, $mem\t# short -> long" %}
6868
6869 ins_encode %{
6870 __ movswq($dst$$Register, $mem$$Address);
6871 %}
6872
6873 ins_pipe(ialu_reg_mem);
6874 %}
6875
6876 // Load Unsigned Short/Char (16 bit UNsigned)
6877 instruct loadUS(rRegI dst, memory mem)
6878 %{
6879 match(Set dst (LoadUS mem));
6880
6881 ins_cost(125);
6882 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6883
6884 ins_encode %{
6885 __ movzwl($dst$$Register, $mem$$Address);
6886 %}
6887
6888 ins_pipe(ialu_reg_mem);
6889 %}
6890
6891 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6892 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6893 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6894
6895 ins_cost(125);
6896 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6897 ins_encode %{
6898 __ movsbl($dst$$Register, $mem$$Address);
6899 %}
6900 ins_pipe(ialu_reg_mem);
6901 %}
6902
6903 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6904 instruct loadUS2L(rRegL dst, memory mem)
6905 %{
6906 match(Set dst (ConvI2L (LoadUS mem)));
6907
6908 ins_cost(125);
6909 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6910
6911 ins_encode %{
6912 __ movzwq($dst$$Register, $mem$$Address);
6913 %}
6914
6915 ins_pipe(ialu_reg_mem);
6916 %}
6917
6918 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6919 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6920 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6921
6922 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
6923 ins_encode %{
6924 __ movzbq($dst$$Register, $mem$$Address);
6925 %}
6926 ins_pipe(ialu_reg_mem);
6927 %}
6928
6929 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
6930 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6931 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6932 effect(KILL cr);
6933
6934 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
6935 "andl $dst, right_n_bits($mask, 16)" %}
6936 ins_encode %{
6937 Register Rdst = $dst$$Register;
6938 __ movzwq(Rdst, $mem$$Address);
6939 __ andl(Rdst, $mask$$constant & right_n_bits(16));
6940 %}
6941 ins_pipe(ialu_reg_mem);
6942 %}
6943
6944 // Load Integer
6945 instruct loadI(rRegI dst, memory mem)
6946 %{
6947 match(Set dst (LoadI mem));
6948
6949 ins_cost(125);
6950 format %{ "movl $dst, $mem\t# int" %}
6951
6952 ins_encode %{
6953 __ movl($dst$$Register, $mem$$Address);
6954 %}
6955
6956 ins_pipe(ialu_reg_mem);
6957 %}
6958
6959 // Load Integer (32 bit signed) to Byte (8 bit signed)
6960 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6961 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6962
6963 ins_cost(125);
6964 format %{ "movsbl $dst, $mem\t# int -> byte" %}
6965 ins_encode %{
6966 __ movsbl($dst$$Register, $mem$$Address);
6967 %}
6968 ins_pipe(ialu_reg_mem);
6969 %}
6970
6971 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6972 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6973 match(Set dst (AndI (LoadI mem) mask));
6974
6975 ins_cost(125);
6976 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
6977 ins_encode %{
6978 __ movzbl($dst$$Register, $mem$$Address);
6979 %}
6980 ins_pipe(ialu_reg_mem);
6981 %}
6982
6983 // Load Integer (32 bit signed) to Short (16 bit signed)
6984 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6985 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6986
6987 ins_cost(125);
6988 format %{ "movswl $dst, $mem\t# int -> short" %}
6989 ins_encode %{
6990 __ movswl($dst$$Register, $mem$$Address);
6991 %}
6992 ins_pipe(ialu_reg_mem);
6993 %}
6994
6995 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6996 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6997 match(Set dst (AndI (LoadI mem) mask));
6998
6999 ins_cost(125);
7000 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7001 ins_encode %{
7002 __ movzwl($dst$$Register, $mem$$Address);
7003 %}
7004 ins_pipe(ialu_reg_mem);
7005 %}
7006
7007 // Load Integer into Long Register
7008 instruct loadI2L(rRegL dst, memory mem)
7009 %{
7010 match(Set dst (ConvI2L (LoadI mem)));
7011
7012 ins_cost(125);
7013 format %{ "movslq $dst, $mem\t# int -> long" %}
7014
7015 ins_encode %{
7016 __ movslq($dst$$Register, $mem$$Address);
7017 %}
7018
7019 ins_pipe(ialu_reg_mem);
7020 %}
7021
7022 // Load Integer with mask 0xFF into Long Register
7023 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7024 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7025
7026 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7027 ins_encode %{
7028 __ movzbq($dst$$Register, $mem$$Address);
7029 %}
7030 ins_pipe(ialu_reg_mem);
7031 %}
7032
7033 // Load Integer with mask 0xFFFF into Long Register
7034 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7035 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7036
7037 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7038 ins_encode %{
7039 __ movzwq($dst$$Register, $mem$$Address);
7040 %}
7041 ins_pipe(ialu_reg_mem);
7042 %}
7043
7044 // Load Integer with a 31-bit mask into Long Register
7045 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7046 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7047 effect(KILL cr);
7048
7049 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7050 "andl $dst, $mask" %}
7051 ins_encode %{
7052 Register Rdst = $dst$$Register;
7053 __ movl(Rdst, $mem$$Address);
7054 __ andl(Rdst, $mask$$constant);
7055 %}
7056 ins_pipe(ialu_reg_mem);
7057 %}
7058
7059 // Load Unsigned Integer into Long Register
7060 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7061 %{
7062 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7063
7064 ins_cost(125);
7065 format %{ "movl $dst, $mem\t# uint -> long" %}
7066
7067 ins_encode %{
7068 __ movl($dst$$Register, $mem$$Address);
7069 %}
7070
7071 ins_pipe(ialu_reg_mem);
7072 %}
7073
7074 // Load Long
7075 instruct loadL(rRegL dst, memory mem)
7076 %{
7077 match(Set dst (LoadL mem));
7078
7079 ins_cost(125);
7080 format %{ "movq $dst, $mem\t# long" %}
7081
7082 ins_encode %{
7083 __ movq($dst$$Register, $mem$$Address);
7084 %}
7085
7086 ins_pipe(ialu_reg_mem); // XXX
7087 %}
7088
7089 // Load Range
7090 instruct loadRange(rRegI dst, memory mem)
7091 %{
7092 match(Set dst (LoadRange mem));
7093
7094 ins_cost(125); // XXX
7095 format %{ "movl $dst, $mem\t# range" %}
7096 ins_encode %{
7097 __ movl($dst$$Register, $mem$$Address);
7098 %}
7099 ins_pipe(ialu_reg_mem);
7100 %}
7101
7102 // Load Pointer
7103 instruct loadP(rRegP dst, memory mem)
7104 %{
7105 match(Set dst (LoadP mem));
7106 predicate(n->as_Load()->barrier_data() == 0);
7107
7108 ins_cost(125); // XXX
7109 format %{ "movq $dst, $mem\t# ptr" %}
7110 ins_encode %{
7111 __ movq($dst$$Register, $mem$$Address);
7112 %}
7113 ins_pipe(ialu_reg_mem); // XXX
7114 %}
7115
7116 // Load Compressed Pointer
7117 instruct loadN(rRegN dst, memory mem)
7118 %{
7119 predicate(n->as_Load()->barrier_data() == 0);
7120 match(Set dst (LoadN mem));
7121
7122 ins_cost(125); // XXX
7123 format %{ "movl $dst, $mem\t# compressed ptr" %}
7124 ins_encode %{
7125 __ movl($dst$$Register, $mem$$Address);
7126 %}
7127 ins_pipe(ialu_reg_mem); // XXX
7128 %}
7129
7130
7131 // Load Klass Pointer
7132 instruct loadKlass(rRegP dst, memory mem)
7133 %{
7134 match(Set dst (LoadKlass mem));
7135
7136 ins_cost(125); // XXX
7137 format %{ "movq $dst, $mem\t# class" %}
7138 ins_encode %{
7139 __ movq($dst$$Register, $mem$$Address);
7140 %}
7141 ins_pipe(ialu_reg_mem); // XXX
7142 %}
7143
7144 // Load narrow Klass Pointer
7145 instruct loadNKlass(rRegN dst, memory mem)
7146 %{
7147 predicate(!UseCompactObjectHeaders);
7148 match(Set dst (LoadNKlass mem));
7149
7150 ins_cost(125); // XXX
7151 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7152 ins_encode %{
7153 __ movl($dst$$Register, $mem$$Address);
7154 %}
7155 ins_pipe(ialu_reg_mem); // XXX
7156 %}
7157
7158 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7159 %{
7160 predicate(UseCompactObjectHeaders);
7161 match(Set dst (LoadNKlass mem));
7162 effect(KILL cr);
7163 ins_cost(125);
7164 format %{
7165 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7166 "shrl $dst, markWord::klass_shift_at_offset"
7167 %}
7168 ins_encode %{
7169 if (UseAPX) {
7170 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7171 }
7172 else {
7173 __ movl($dst$$Register, $mem$$Address);
7174 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7175 }
7176 %}
7177 ins_pipe(ialu_reg_mem);
7178 %}
7179
7180 // Load Float
7181 instruct loadF(regF dst, memory mem)
7182 %{
7183 match(Set dst (LoadF mem));
7184
7185 ins_cost(145); // XXX
7186 format %{ "movss $dst, $mem\t# float" %}
7187 ins_encode %{
7188 __ movflt($dst$$XMMRegister, $mem$$Address);
7189 %}
7190 ins_pipe(pipe_slow); // XXX
7191 %}
7192
7193 // Load Double
7194 instruct loadD_partial(regD dst, memory mem)
7195 %{
7196 predicate(!UseXmmLoadAndClearUpper);
7197 match(Set dst (LoadD mem));
7198
7199 ins_cost(145); // XXX
7200 format %{ "movlpd $dst, $mem\t# double" %}
7201 ins_encode %{
7202 __ movdbl($dst$$XMMRegister, $mem$$Address);
7203 %}
7204 ins_pipe(pipe_slow); // XXX
7205 %}
7206
7207 instruct loadD(regD dst, memory mem)
7208 %{
7209 predicate(UseXmmLoadAndClearUpper);
7210 match(Set dst (LoadD mem));
7211
7212 ins_cost(145); // XXX
7213 format %{ "movsd $dst, $mem\t# double" %}
7214 ins_encode %{
7215 __ movdbl($dst$$XMMRegister, $mem$$Address);
7216 %}
7217 ins_pipe(pipe_slow); // XXX
7218 %}
7219
7220 // max = java.lang.Math.max(float a, float b)
7221 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
7222 predicate(VM_Version::supports_avx10_2());
7223 match(Set dst (MaxF a b));
7224 format %{ "maxF $dst, $a, $b" %}
7225 ins_encode %{
7226 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7227 %}
7228 ins_pipe( pipe_slow );
7229 %}
7230
7231 // max = java.lang.Math.max(float a, float b)
7232 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7233 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7234 match(Set dst (MaxF a b));
7235 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7236 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7237 ins_encode %{
7238 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7239 %}
7240 ins_pipe( pipe_slow );
7241 %}
7242
7243 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7244 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7245 match(Set dst (MaxF a b));
7246 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7247
7248 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7249 ins_encode %{
7250 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7251 false /*min*/, true /*single*/);
7252 %}
7253 ins_pipe( pipe_slow );
7254 %}
7255
7256 // max = java.lang.Math.max(double a, double b)
7257 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
7258 predicate(VM_Version::supports_avx10_2());
7259 match(Set dst (MaxD a b));
7260 format %{ "maxD $dst, $a, $b" %}
7261 ins_encode %{
7262 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7263 %}
7264 ins_pipe( pipe_slow );
7265 %}
7266
7267 // max = java.lang.Math.max(double a, double b)
7268 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7269 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7270 match(Set dst (MaxD a b));
7271 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7272 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7273 ins_encode %{
7274 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7275 %}
7276 ins_pipe( pipe_slow );
7277 %}
7278
7279 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7280 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7281 match(Set dst (MaxD a b));
7282 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7283
7284 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7285 ins_encode %{
7286 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7287 false /*min*/, false /*single*/);
7288 %}
7289 ins_pipe( pipe_slow );
7290 %}
7291
7292 // max = java.lang.Math.min(float a, float b)
7293 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
7294 predicate(VM_Version::supports_avx10_2());
7295 match(Set dst (MinF a b));
7296 format %{ "minF $dst, $a, $b" %}
7297 ins_encode %{
7298 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7299 %}
7300 ins_pipe( pipe_slow );
7301 %}
7302
7303 // min = java.lang.Math.min(float a, float b)
7304 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7305 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7306 match(Set dst (MinF a b));
7307 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7308 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7309 ins_encode %{
7310 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7311 %}
7312 ins_pipe( pipe_slow );
7313 %}
7314
7315 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7316 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7317 match(Set dst (MinF a b));
7318 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7319
7320 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7321 ins_encode %{
7322 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7323 true /*min*/, true /*single*/);
7324 %}
7325 ins_pipe( pipe_slow );
7326 %}
7327
7328 // max = java.lang.Math.min(double a, double b)
7329 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
7330 predicate(VM_Version::supports_avx10_2());
7331 match(Set dst (MinD a b));
7332 format %{ "minD $dst, $a, $b" %}
7333 ins_encode %{
7334 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7335 %}
7336 ins_pipe( pipe_slow );
7337 %}
7338
7339 // min = java.lang.Math.min(double a, double b)
7340 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7341 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7342 match(Set dst (MinD a b));
7343 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7344 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7345 ins_encode %{
7346 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7347 %}
7348 ins_pipe( pipe_slow );
7349 %}
7350
7351 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7352 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7353 match(Set dst (MinD a b));
7354 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7355
7356 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7357 ins_encode %{
7358 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7359 true /*min*/, false /*single*/);
7360 %}
7361 ins_pipe( pipe_slow );
7362 %}
7363
7364 // Load Effective Address
7365 instruct leaP8(rRegP dst, indOffset8 mem)
7366 %{
7367 match(Set dst mem);
7368
7369 ins_cost(110); // XXX
7370 format %{ "leaq $dst, $mem\t# ptr 8" %}
7371 ins_encode %{
7372 __ leaq($dst$$Register, $mem$$Address);
7373 %}
7374 ins_pipe(ialu_reg_reg_fat);
7375 %}
7376
7377 instruct leaP32(rRegP dst, indOffset32 mem)
7378 %{
7379 match(Set dst mem);
7380
7381 ins_cost(110);
7382 format %{ "leaq $dst, $mem\t# ptr 32" %}
7383 ins_encode %{
7384 __ leaq($dst$$Register, $mem$$Address);
7385 %}
7386 ins_pipe(ialu_reg_reg_fat);
7387 %}
7388
7389 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7390 %{
7391 match(Set dst mem);
7392
7393 ins_cost(110);
7394 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7395 ins_encode %{
7396 __ leaq($dst$$Register, $mem$$Address);
7397 %}
7398 ins_pipe(ialu_reg_reg_fat);
7399 %}
7400
7401 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7402 %{
7403 match(Set dst mem);
7404
7405 ins_cost(110);
7406 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7407 ins_encode %{
7408 __ leaq($dst$$Register, $mem$$Address);
7409 %}
7410 ins_pipe(ialu_reg_reg_fat);
7411 %}
7412
7413 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7414 %{
7415 match(Set dst mem);
7416
7417 ins_cost(110);
7418 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7419 ins_encode %{
7420 __ leaq($dst$$Register, $mem$$Address);
7421 %}
7422 ins_pipe(ialu_reg_reg_fat);
7423 %}
7424
7425 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7426 %{
7427 match(Set dst mem);
7428
7429 ins_cost(110);
7430 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7431 ins_encode %{
7432 __ leaq($dst$$Register, $mem$$Address);
7433 %}
7434 ins_pipe(ialu_reg_reg_fat);
7435 %}
7436
7437 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7438 %{
7439 match(Set dst mem);
7440
7441 ins_cost(110);
7442 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7443 ins_encode %{
7444 __ leaq($dst$$Register, $mem$$Address);
7445 %}
7446 ins_pipe(ialu_reg_reg_fat);
7447 %}
7448
7449 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7450 %{
7451 match(Set dst mem);
7452
7453 ins_cost(110);
7454 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7455 ins_encode %{
7456 __ leaq($dst$$Register, $mem$$Address);
7457 %}
7458 ins_pipe(ialu_reg_reg_fat);
7459 %}
7460
7461 // Load Effective Address which uses Narrow (32-bits) oop
7462 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7463 %{
7464 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7465 match(Set dst mem);
7466
7467 ins_cost(110);
7468 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7469 ins_encode %{
7470 __ leaq($dst$$Register, $mem$$Address);
7471 %}
7472 ins_pipe(ialu_reg_reg_fat);
7473 %}
7474
7475 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7476 %{
7477 predicate(CompressedOops::shift() == 0);
7478 match(Set dst mem);
7479
7480 ins_cost(110); // XXX
7481 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7482 ins_encode %{
7483 __ leaq($dst$$Register, $mem$$Address);
7484 %}
7485 ins_pipe(ialu_reg_reg_fat);
7486 %}
7487
7488 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7489 %{
7490 predicate(CompressedOops::shift() == 0);
7491 match(Set dst mem);
7492
7493 ins_cost(110);
7494 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7495 ins_encode %{
7496 __ leaq($dst$$Register, $mem$$Address);
7497 %}
7498 ins_pipe(ialu_reg_reg_fat);
7499 %}
7500
7501 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7502 %{
7503 predicate(CompressedOops::shift() == 0);
7504 match(Set dst mem);
7505
7506 ins_cost(110);
7507 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7508 ins_encode %{
7509 __ leaq($dst$$Register, $mem$$Address);
7510 %}
7511 ins_pipe(ialu_reg_reg_fat);
7512 %}
7513
7514 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7515 %{
7516 predicate(CompressedOops::shift() == 0);
7517 match(Set dst mem);
7518
7519 ins_cost(110);
7520 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7521 ins_encode %{
7522 __ leaq($dst$$Register, $mem$$Address);
7523 %}
7524 ins_pipe(ialu_reg_reg_fat);
7525 %}
7526
7527 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7528 %{
7529 predicate(CompressedOops::shift() == 0);
7530 match(Set dst mem);
7531
7532 ins_cost(110);
7533 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7534 ins_encode %{
7535 __ leaq($dst$$Register, $mem$$Address);
7536 %}
7537 ins_pipe(ialu_reg_reg_fat);
7538 %}
7539
7540 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7541 %{
7542 predicate(CompressedOops::shift() == 0);
7543 match(Set dst mem);
7544
7545 ins_cost(110);
7546 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7547 ins_encode %{
7548 __ leaq($dst$$Register, $mem$$Address);
7549 %}
7550 ins_pipe(ialu_reg_reg_fat);
7551 %}
7552
7553 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7554 %{
7555 predicate(CompressedOops::shift() == 0);
7556 match(Set dst mem);
7557
7558 ins_cost(110);
7559 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7560 ins_encode %{
7561 __ leaq($dst$$Register, $mem$$Address);
7562 %}
7563 ins_pipe(ialu_reg_reg_fat);
7564 %}
7565
7566 instruct loadConI(rRegI dst, immI src)
7567 %{
7568 match(Set dst src);
7569
7570 format %{ "movl $dst, $src\t# int" %}
7571 ins_encode %{
7572 __ movl($dst$$Register, $src$$constant);
7573 %}
7574 ins_pipe(ialu_reg_fat); // XXX
7575 %}
7576
7577 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7578 %{
7579 match(Set dst src);
7580 effect(KILL cr);
7581
7582 ins_cost(50);
7583 format %{ "xorl $dst, $dst\t# int" %}
7584 ins_encode %{
7585 __ xorl($dst$$Register, $dst$$Register);
7586 %}
7587 ins_pipe(ialu_reg);
7588 %}
7589
7590 instruct loadConL(rRegL dst, immL src)
7591 %{
7592 match(Set dst src);
7593
7594 ins_cost(150);
7595 format %{ "movq $dst, $src\t# long" %}
7596 ins_encode %{
7597 __ mov64($dst$$Register, $src$$constant);
7598 %}
7599 ins_pipe(ialu_reg);
7600 %}
7601
7602 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7603 %{
7604 match(Set dst src);
7605 effect(KILL cr);
7606
7607 ins_cost(50);
7608 format %{ "xorl $dst, $dst\t# long" %}
7609 ins_encode %{
7610 __ xorl($dst$$Register, $dst$$Register);
7611 %}
7612 ins_pipe(ialu_reg); // XXX
7613 %}
7614
7615 instruct loadConUL32(rRegL dst, immUL32 src)
7616 %{
7617 match(Set dst src);
7618
7619 ins_cost(60);
7620 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7621 ins_encode %{
7622 __ movl($dst$$Register, $src$$constant);
7623 %}
7624 ins_pipe(ialu_reg);
7625 %}
7626
7627 instruct loadConL32(rRegL dst, immL32 src)
7628 %{
7629 match(Set dst src);
7630
7631 ins_cost(70);
7632 format %{ "movq $dst, $src\t# long (32-bit)" %}
7633 ins_encode %{
7634 __ movq($dst$$Register, $src$$constant);
7635 %}
7636 ins_pipe(ialu_reg);
7637 %}
7638
7639 instruct loadConP(rRegP dst, immP con) %{
7640 match(Set dst con);
7641
7642 format %{ "movq $dst, $con\t# ptr" %}
7643 ins_encode %{
7644 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7645 %}
7646 ins_pipe(ialu_reg_fat); // XXX
7647 %}
7648
7649 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7650 %{
7651 match(Set dst src);
7652 effect(KILL cr);
7653
7654 ins_cost(50);
7655 format %{ "xorl $dst, $dst\t# ptr" %}
7656 ins_encode %{
7657 __ xorl($dst$$Register, $dst$$Register);
7658 %}
7659 ins_pipe(ialu_reg);
7660 %}
7661
7662 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7663 %{
7664 match(Set dst src);
7665 effect(KILL cr);
7666
7667 ins_cost(60);
7668 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7669 ins_encode %{
7670 __ movl($dst$$Register, $src$$constant);
7671 %}
7672 ins_pipe(ialu_reg);
7673 %}
7674
7675 instruct loadConF(regF dst, immF con) %{
7676 match(Set dst con);
7677 ins_cost(125);
7678 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7679 ins_encode %{
7680 __ movflt($dst$$XMMRegister, $constantaddress($con));
7681 %}
7682 ins_pipe(pipe_slow);
7683 %}
7684
7685 instruct loadConH(regF dst, immH con) %{
7686 match(Set dst con);
7687 ins_cost(125);
7688 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7689 ins_encode %{
7690 __ movflt($dst$$XMMRegister, $constantaddress($con));
7691 %}
7692 ins_pipe(pipe_slow);
7693 %}
7694
7695 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7696 match(Set dst src);
7697 effect(KILL cr);
7698 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7699 ins_encode %{
7700 __ xorq($dst$$Register, $dst$$Register);
7701 %}
7702 ins_pipe(ialu_reg);
7703 %}
7704
7705 instruct loadConN(rRegN dst, immN src) %{
7706 match(Set dst src);
7707
7708 ins_cost(125);
7709 format %{ "movl $dst, $src\t# compressed ptr" %}
7710 ins_encode %{
7711 address con = (address)$src$$constant;
7712 if (con == nullptr) {
7713 ShouldNotReachHere();
7714 } else {
7715 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7716 }
7717 %}
7718 ins_pipe(ialu_reg_fat); // XXX
7719 %}
7720
7721 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7722 match(Set dst src);
7723
7724 ins_cost(125);
7725 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7726 ins_encode %{
7727 address con = (address)$src$$constant;
7728 if (con == nullptr) {
7729 ShouldNotReachHere();
7730 } else {
7731 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7732 }
7733 %}
7734 ins_pipe(ialu_reg_fat); // XXX
7735 %}
7736
7737 instruct loadConF0(regF dst, immF0 src)
7738 %{
7739 match(Set dst src);
7740 ins_cost(100);
7741
7742 format %{ "xorps $dst, $dst\t# float 0.0" %}
7743 ins_encode %{
7744 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7745 %}
7746 ins_pipe(pipe_slow);
7747 %}
7748
7749 // Use the same format since predicate() can not be used here.
7750 instruct loadConD(regD dst, immD con) %{
7751 match(Set dst con);
7752 ins_cost(125);
7753 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7754 ins_encode %{
7755 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7756 %}
7757 ins_pipe(pipe_slow);
7758 %}
7759
7760 instruct loadConD0(regD dst, immD0 src)
7761 %{
7762 match(Set dst src);
7763 ins_cost(100);
7764
7765 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7766 ins_encode %{
7767 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7768 %}
7769 ins_pipe(pipe_slow);
7770 %}
7771
7772 instruct loadSSI(rRegI dst, stackSlotI src)
7773 %{
7774 match(Set dst src);
7775
7776 ins_cost(125);
7777 format %{ "movl $dst, $src\t# int stk" %}
7778 ins_encode %{
7779 __ movl($dst$$Register, $src$$Address);
7780 %}
7781 ins_pipe(ialu_reg_mem);
7782 %}
7783
7784 instruct loadSSL(rRegL dst, stackSlotL src)
7785 %{
7786 match(Set dst src);
7787
7788 ins_cost(125);
7789 format %{ "movq $dst, $src\t# long stk" %}
7790 ins_encode %{
7791 __ movq($dst$$Register, $src$$Address);
7792 %}
7793 ins_pipe(ialu_reg_mem);
7794 %}
7795
7796 instruct loadSSP(rRegP dst, stackSlotP src)
7797 %{
7798 match(Set dst src);
7799
7800 ins_cost(125);
7801 format %{ "movq $dst, $src\t# ptr stk" %}
7802 ins_encode %{
7803 __ movq($dst$$Register, $src$$Address);
7804 %}
7805 ins_pipe(ialu_reg_mem);
7806 %}
7807
7808 instruct loadSSF(regF dst, stackSlotF src)
7809 %{
7810 match(Set dst src);
7811
7812 ins_cost(125);
7813 format %{ "movss $dst, $src\t# float stk" %}
7814 ins_encode %{
7815 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7816 %}
7817 ins_pipe(pipe_slow); // XXX
7818 %}
7819
7820 // Use the same format since predicate() can not be used here.
7821 instruct loadSSD(regD dst, stackSlotD src)
7822 %{
7823 match(Set dst src);
7824
7825 ins_cost(125);
7826 format %{ "movsd $dst, $src\t# double stk" %}
7827 ins_encode %{
7828 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7829 %}
7830 ins_pipe(pipe_slow); // XXX
7831 %}
7832
7833 // Prefetch instructions for allocation.
7834 // Must be safe to execute with invalid address (cannot fault).
7835
7836 instruct prefetchAlloc( memory mem ) %{
7837 predicate(AllocatePrefetchInstr==3);
7838 match(PrefetchAllocation mem);
7839 ins_cost(125);
7840
7841 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7842 ins_encode %{
7843 __ prefetchw($mem$$Address);
7844 %}
7845 ins_pipe(ialu_mem);
7846 %}
7847
7848 instruct prefetchAllocNTA( memory mem ) %{
7849 predicate(AllocatePrefetchInstr==0);
7850 match(PrefetchAllocation mem);
7851 ins_cost(125);
7852
7853 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7854 ins_encode %{
7855 __ prefetchnta($mem$$Address);
7856 %}
7857 ins_pipe(ialu_mem);
7858 %}
7859
7860 instruct prefetchAllocT0( memory mem ) %{
7861 predicate(AllocatePrefetchInstr==1);
7862 match(PrefetchAllocation mem);
7863 ins_cost(125);
7864
7865 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7866 ins_encode %{
7867 __ prefetcht0($mem$$Address);
7868 %}
7869 ins_pipe(ialu_mem);
7870 %}
7871
7872 instruct prefetchAllocT2( memory mem ) %{
7873 predicate(AllocatePrefetchInstr==2);
7874 match(PrefetchAllocation mem);
7875 ins_cost(125);
7876
7877 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7878 ins_encode %{
7879 __ prefetcht2($mem$$Address);
7880 %}
7881 ins_pipe(ialu_mem);
7882 %}
7883
7884 //----------Store Instructions-------------------------------------------------
7885
7886 // Store Byte
7887 instruct storeB(memory mem, rRegI src)
7888 %{
7889 match(Set mem (StoreB mem src));
7890
7891 ins_cost(125); // XXX
7892 format %{ "movb $mem, $src\t# byte" %}
7893 ins_encode %{
7894 __ movb($mem$$Address, $src$$Register);
7895 %}
7896 ins_pipe(ialu_mem_reg);
7897 %}
7898
7899 // Store Char/Short
7900 instruct storeC(memory mem, rRegI src)
7901 %{
7902 match(Set mem (StoreC mem src));
7903
7904 ins_cost(125); // XXX
7905 format %{ "movw $mem, $src\t# char/short" %}
7906 ins_encode %{
7907 __ movw($mem$$Address, $src$$Register);
7908 %}
7909 ins_pipe(ialu_mem_reg);
7910 %}
7911
7912 // Store Integer
7913 instruct storeI(memory mem, rRegI src)
7914 %{
7915 match(Set mem (StoreI mem src));
7916
7917 ins_cost(125); // XXX
7918 format %{ "movl $mem, $src\t# int" %}
7919 ins_encode %{
7920 __ movl($mem$$Address, $src$$Register);
7921 %}
7922 ins_pipe(ialu_mem_reg);
7923 %}
7924
7925 // Store Long
7926 instruct storeL(memory mem, rRegL src)
7927 %{
7928 match(Set mem (StoreL mem src));
7929
7930 ins_cost(125); // XXX
7931 format %{ "movq $mem, $src\t# long" %}
7932 ins_encode %{
7933 __ movq($mem$$Address, $src$$Register);
7934 %}
7935 ins_pipe(ialu_mem_reg); // XXX
7936 %}
7937
7938 // Store Pointer
7939 instruct storeP(memory mem, any_RegP src)
7940 %{
7941 predicate(n->as_Store()->barrier_data() == 0);
7942 match(Set mem (StoreP mem src));
7943
7944 ins_cost(125); // XXX
7945 format %{ "movq $mem, $src\t# ptr" %}
7946 ins_encode %{
7947 __ movq($mem$$Address, $src$$Register);
7948 %}
7949 ins_pipe(ialu_mem_reg);
7950 %}
7951
7952 instruct storeImmP0(memory mem, immP0 zero)
7953 %{
7954 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
7955 match(Set mem (StoreP mem zero));
7956
7957 ins_cost(125); // XXX
7958 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
7959 ins_encode %{
7960 __ movq($mem$$Address, r12);
7961 %}
7962 ins_pipe(ialu_mem_reg);
7963 %}
7964
7965 // Store Null Pointer, mark word, or other simple pointer constant.
7966 instruct storeImmP(memory mem, immP31 src)
7967 %{
7968 predicate(n->as_Store()->barrier_data() == 0);
7969 match(Set mem (StoreP mem src));
7970
7971 ins_cost(150); // XXX
7972 format %{ "movq $mem, $src\t# ptr" %}
7973 ins_encode %{
7974 __ movq($mem$$Address, $src$$constant);
7975 %}
7976 ins_pipe(ialu_mem_imm);
7977 %}
7978
7979 // Store Compressed Pointer
7980 instruct storeN(memory mem, rRegN src)
7981 %{
7982 predicate(n->as_Store()->barrier_data() == 0);
7983 match(Set mem (StoreN mem src));
7984
7985 ins_cost(125); // XXX
7986 format %{ "movl $mem, $src\t# compressed ptr" %}
7987 ins_encode %{
7988 __ movl($mem$$Address, $src$$Register);
7989 %}
7990 ins_pipe(ialu_mem_reg);
7991 %}
7992
7993 instruct storeNKlass(memory mem, rRegN src)
7994 %{
7995 match(Set mem (StoreNKlass mem src));
7996
7997 ins_cost(125); // XXX
7998 format %{ "movl $mem, $src\t# compressed klass ptr" %}
7999 ins_encode %{
8000 __ movl($mem$$Address, $src$$Register);
8001 %}
8002 ins_pipe(ialu_mem_reg);
8003 %}
8004
8005 instruct storeImmN0(memory mem, immN0 zero)
8006 %{
8007 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8008 match(Set mem (StoreN mem zero));
8009
8010 ins_cost(125); // XXX
8011 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8012 ins_encode %{
8013 __ movl($mem$$Address, r12);
8014 %}
8015 ins_pipe(ialu_mem_reg);
8016 %}
8017
8018 instruct storeImmN(memory mem, immN src)
8019 %{
8020 predicate(n->as_Store()->barrier_data() == 0);
8021 match(Set mem (StoreN mem src));
8022
8023 ins_cost(150); // XXX
8024 format %{ "movl $mem, $src\t# compressed ptr" %}
8025 ins_encode %{
8026 address con = (address)$src$$constant;
8027 if (con == nullptr) {
8028 __ movl($mem$$Address, 0);
8029 } else {
8030 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8031 }
8032 %}
8033 ins_pipe(ialu_mem_imm);
8034 %}
8035
8036 instruct storeImmNKlass(memory mem, immNKlass src)
8037 %{
8038 match(Set mem (StoreNKlass mem src));
8039
8040 ins_cost(150); // XXX
8041 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8042 ins_encode %{
8043 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8044 %}
8045 ins_pipe(ialu_mem_imm);
8046 %}
8047
8048 // Store Integer Immediate
8049 instruct storeImmI0(memory mem, immI_0 zero)
8050 %{
8051 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8052 match(Set mem (StoreI mem zero));
8053
8054 ins_cost(125); // XXX
8055 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8056 ins_encode %{
8057 __ movl($mem$$Address, r12);
8058 %}
8059 ins_pipe(ialu_mem_reg);
8060 %}
8061
8062 instruct storeImmI(memory mem, immI src)
8063 %{
8064 match(Set mem (StoreI mem src));
8065
8066 ins_cost(150);
8067 format %{ "movl $mem, $src\t# int" %}
8068 ins_encode %{
8069 __ movl($mem$$Address, $src$$constant);
8070 %}
8071 ins_pipe(ialu_mem_imm);
8072 %}
8073
8074 // Store Long Immediate
8075 instruct storeImmL0(memory mem, immL0 zero)
8076 %{
8077 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8078 match(Set mem (StoreL mem zero));
8079
8080 ins_cost(125); // XXX
8081 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8082 ins_encode %{
8083 __ movq($mem$$Address, r12);
8084 %}
8085 ins_pipe(ialu_mem_reg);
8086 %}
8087
8088 instruct storeImmL(memory mem, immL32 src)
8089 %{
8090 match(Set mem (StoreL mem src));
8091
8092 ins_cost(150);
8093 format %{ "movq $mem, $src\t# long" %}
8094 ins_encode %{
8095 __ movq($mem$$Address, $src$$constant);
8096 %}
8097 ins_pipe(ialu_mem_imm);
8098 %}
8099
8100 // Store Short/Char Immediate
8101 instruct storeImmC0(memory mem, immI_0 zero)
8102 %{
8103 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8104 match(Set mem (StoreC mem zero));
8105
8106 ins_cost(125); // XXX
8107 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8108 ins_encode %{
8109 __ movw($mem$$Address, r12);
8110 %}
8111 ins_pipe(ialu_mem_reg);
8112 %}
8113
8114 instruct storeImmI16(memory mem, immI16 src)
8115 %{
8116 predicate(UseStoreImmI16);
8117 match(Set mem (StoreC mem src));
8118
8119 ins_cost(150);
8120 format %{ "movw $mem, $src\t# short/char" %}
8121 ins_encode %{
8122 __ movw($mem$$Address, $src$$constant);
8123 %}
8124 ins_pipe(ialu_mem_imm);
8125 %}
8126
8127 // Store Byte Immediate
8128 instruct storeImmB0(memory mem, immI_0 zero)
8129 %{
8130 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8131 match(Set mem (StoreB mem zero));
8132
8133 ins_cost(125); // XXX
8134 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8135 ins_encode %{
8136 __ movb($mem$$Address, r12);
8137 %}
8138 ins_pipe(ialu_mem_reg);
8139 %}
8140
8141 instruct storeImmB(memory mem, immI8 src)
8142 %{
8143 match(Set mem (StoreB mem src));
8144
8145 ins_cost(150); // XXX
8146 format %{ "movb $mem, $src\t# byte" %}
8147 ins_encode %{
8148 __ movb($mem$$Address, $src$$constant);
8149 %}
8150 ins_pipe(ialu_mem_imm);
8151 %}
8152
8153 // Store Float
8154 instruct storeF(memory mem, regF src)
8155 %{
8156 match(Set mem (StoreF mem src));
8157
8158 ins_cost(95); // XXX
8159 format %{ "movss $mem, $src\t# float" %}
8160 ins_encode %{
8161 __ movflt($mem$$Address, $src$$XMMRegister);
8162 %}
8163 ins_pipe(pipe_slow); // XXX
8164 %}
8165
8166 // Store immediate Float value (it is faster than store from XMM register)
8167 instruct storeF0(memory mem, immF0 zero)
8168 %{
8169 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8170 match(Set mem (StoreF mem zero));
8171
8172 ins_cost(25); // XXX
8173 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8174 ins_encode %{
8175 __ movl($mem$$Address, r12);
8176 %}
8177 ins_pipe(ialu_mem_reg);
8178 %}
8179
8180 instruct storeF_imm(memory mem, immF src)
8181 %{
8182 match(Set mem (StoreF mem src));
8183
8184 ins_cost(50);
8185 format %{ "movl $mem, $src\t# float" %}
8186 ins_encode %{
8187 __ movl($mem$$Address, jint_cast($src$$constant));
8188 %}
8189 ins_pipe(ialu_mem_imm);
8190 %}
8191
8192 // Store Double
8193 instruct storeD(memory mem, regD src)
8194 %{
8195 match(Set mem (StoreD mem src));
8196
8197 ins_cost(95); // XXX
8198 format %{ "movsd $mem, $src\t# double" %}
8199 ins_encode %{
8200 __ movdbl($mem$$Address, $src$$XMMRegister);
8201 %}
8202 ins_pipe(pipe_slow); // XXX
8203 %}
8204
8205 // Store immediate double 0.0 (it is faster than store from XMM register)
8206 instruct storeD0_imm(memory mem, immD0 src)
8207 %{
8208 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8209 match(Set mem (StoreD mem src));
8210
8211 ins_cost(50);
8212 format %{ "movq $mem, $src\t# double 0." %}
8213 ins_encode %{
8214 __ movq($mem$$Address, $src$$constant);
8215 %}
8216 ins_pipe(ialu_mem_imm);
8217 %}
8218
8219 instruct storeD0(memory mem, immD0 zero)
8220 %{
8221 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8222 match(Set mem (StoreD mem zero));
8223
8224 ins_cost(25); // XXX
8225 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8226 ins_encode %{
8227 __ movq($mem$$Address, r12);
8228 %}
8229 ins_pipe(ialu_mem_reg);
8230 %}
8231
8232 instruct storeSSI(stackSlotI dst, rRegI src)
8233 %{
8234 match(Set dst src);
8235
8236 ins_cost(100);
8237 format %{ "movl $dst, $src\t# int stk" %}
8238 ins_encode %{
8239 __ movl($dst$$Address, $src$$Register);
8240 %}
8241 ins_pipe( ialu_mem_reg );
8242 %}
8243
8244 instruct storeSSL(stackSlotL dst, rRegL src)
8245 %{
8246 match(Set dst src);
8247
8248 ins_cost(100);
8249 format %{ "movq $dst, $src\t# long stk" %}
8250 ins_encode %{
8251 __ movq($dst$$Address, $src$$Register);
8252 %}
8253 ins_pipe(ialu_mem_reg);
8254 %}
8255
8256 instruct storeSSP(stackSlotP dst, rRegP src)
8257 %{
8258 match(Set dst src);
8259
8260 ins_cost(100);
8261 format %{ "movq $dst, $src\t# ptr stk" %}
8262 ins_encode %{
8263 __ movq($dst$$Address, $src$$Register);
8264 %}
8265 ins_pipe(ialu_mem_reg);
8266 %}
8267
8268 instruct storeSSF(stackSlotF dst, regF src)
8269 %{
8270 match(Set dst src);
8271
8272 ins_cost(95); // XXX
8273 format %{ "movss $dst, $src\t# float stk" %}
8274 ins_encode %{
8275 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8276 %}
8277 ins_pipe(pipe_slow); // XXX
8278 %}
8279
8280 instruct storeSSD(stackSlotD dst, regD src)
8281 %{
8282 match(Set dst src);
8283
8284 ins_cost(95); // XXX
8285 format %{ "movsd $dst, $src\t# double stk" %}
8286 ins_encode %{
8287 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8288 %}
8289 ins_pipe(pipe_slow); // XXX
8290 %}
8291
8292 instruct cacheWB(indirect addr)
8293 %{
8294 predicate(VM_Version::supports_data_cache_line_flush());
8295 match(CacheWB addr);
8296
8297 ins_cost(100);
8298 format %{"cache wb $addr" %}
8299 ins_encode %{
8300 assert($addr->index_position() < 0, "should be");
8301 assert($addr$$disp == 0, "should be");
8302 __ cache_wb(Address($addr$$base$$Register, 0));
8303 %}
8304 ins_pipe(pipe_slow); // XXX
8305 %}
8306
8307 instruct cacheWBPreSync()
8308 %{
8309 predicate(VM_Version::supports_data_cache_line_flush());
8310 match(CacheWBPreSync);
8311
8312 ins_cost(100);
8313 format %{"cache wb presync" %}
8314 ins_encode %{
8315 __ cache_wbsync(true);
8316 %}
8317 ins_pipe(pipe_slow); // XXX
8318 %}
8319
8320 instruct cacheWBPostSync()
8321 %{
8322 predicate(VM_Version::supports_data_cache_line_flush());
8323 match(CacheWBPostSync);
8324
8325 ins_cost(100);
8326 format %{"cache wb postsync" %}
8327 ins_encode %{
8328 __ cache_wbsync(false);
8329 %}
8330 ins_pipe(pipe_slow); // XXX
8331 %}
8332
8333 //----------BSWAP Instructions-------------------------------------------------
8334 instruct bytes_reverse_int(rRegI dst) %{
8335 match(Set dst (ReverseBytesI dst));
8336
8337 format %{ "bswapl $dst" %}
8338 ins_encode %{
8339 __ bswapl($dst$$Register);
8340 %}
8341 ins_pipe( ialu_reg );
8342 %}
8343
8344 instruct bytes_reverse_long(rRegL dst) %{
8345 match(Set dst (ReverseBytesL dst));
8346
8347 format %{ "bswapq $dst" %}
8348 ins_encode %{
8349 __ bswapq($dst$$Register);
8350 %}
8351 ins_pipe( ialu_reg);
8352 %}
8353
8354 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8355 match(Set dst (ReverseBytesUS dst));
8356 effect(KILL cr);
8357
8358 format %{ "bswapl $dst\n\t"
8359 "shrl $dst,16\n\t" %}
8360 ins_encode %{
8361 __ bswapl($dst$$Register);
8362 __ shrl($dst$$Register, 16);
8363 %}
8364 ins_pipe( ialu_reg );
8365 %}
8366
8367 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8368 match(Set dst (ReverseBytesS dst));
8369 effect(KILL cr);
8370
8371 format %{ "bswapl $dst\n\t"
8372 "sar $dst,16\n\t" %}
8373 ins_encode %{
8374 __ bswapl($dst$$Register);
8375 __ sarl($dst$$Register, 16);
8376 %}
8377 ins_pipe( ialu_reg );
8378 %}
8379
8380 //---------- Zeros Count Instructions ------------------------------------------
8381
8382 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8383 predicate(UseCountLeadingZerosInstruction);
8384 match(Set dst (CountLeadingZerosI src));
8385 effect(KILL cr);
8386
8387 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8388 ins_encode %{
8389 __ lzcntl($dst$$Register, $src$$Register);
8390 %}
8391 ins_pipe(ialu_reg);
8392 %}
8393
8394 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8395 predicate(UseCountLeadingZerosInstruction);
8396 match(Set dst (CountLeadingZerosI (LoadI src)));
8397 effect(KILL cr);
8398 ins_cost(175);
8399 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8400 ins_encode %{
8401 __ lzcntl($dst$$Register, $src$$Address);
8402 %}
8403 ins_pipe(ialu_reg_mem);
8404 %}
8405
8406 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8407 predicate(!UseCountLeadingZerosInstruction);
8408 match(Set dst (CountLeadingZerosI src));
8409 effect(KILL cr);
8410
8411 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8412 "jnz skip\n\t"
8413 "movl $dst, -1\n"
8414 "skip:\n\t"
8415 "negl $dst\n\t"
8416 "addl $dst, 31" %}
8417 ins_encode %{
8418 Register Rdst = $dst$$Register;
8419 Register Rsrc = $src$$Register;
8420 Label skip;
8421 __ bsrl(Rdst, Rsrc);
8422 __ jccb(Assembler::notZero, skip);
8423 __ movl(Rdst, -1);
8424 __ bind(skip);
8425 __ negl(Rdst);
8426 __ addl(Rdst, BitsPerInt - 1);
8427 %}
8428 ins_pipe(ialu_reg);
8429 %}
8430
8431 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8432 predicate(UseCountLeadingZerosInstruction);
8433 match(Set dst (CountLeadingZerosL src));
8434 effect(KILL cr);
8435
8436 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8437 ins_encode %{
8438 __ lzcntq($dst$$Register, $src$$Register);
8439 %}
8440 ins_pipe(ialu_reg);
8441 %}
8442
8443 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8444 predicate(UseCountLeadingZerosInstruction);
8445 match(Set dst (CountLeadingZerosL (LoadL src)));
8446 effect(KILL cr);
8447 ins_cost(175);
8448 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8449 ins_encode %{
8450 __ lzcntq($dst$$Register, $src$$Address);
8451 %}
8452 ins_pipe(ialu_reg_mem);
8453 %}
8454
8455 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8456 predicate(!UseCountLeadingZerosInstruction);
8457 match(Set dst (CountLeadingZerosL src));
8458 effect(KILL cr);
8459
8460 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8461 "jnz skip\n\t"
8462 "movl $dst, -1\n"
8463 "skip:\n\t"
8464 "negl $dst\n\t"
8465 "addl $dst, 63" %}
8466 ins_encode %{
8467 Register Rdst = $dst$$Register;
8468 Register Rsrc = $src$$Register;
8469 Label skip;
8470 __ bsrq(Rdst, Rsrc);
8471 __ jccb(Assembler::notZero, skip);
8472 __ movl(Rdst, -1);
8473 __ bind(skip);
8474 __ negl(Rdst);
8475 __ addl(Rdst, BitsPerLong - 1);
8476 %}
8477 ins_pipe(ialu_reg);
8478 %}
8479
8480 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8481 predicate(UseCountTrailingZerosInstruction);
8482 match(Set dst (CountTrailingZerosI src));
8483 effect(KILL cr);
8484
8485 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8486 ins_encode %{
8487 __ tzcntl($dst$$Register, $src$$Register);
8488 %}
8489 ins_pipe(ialu_reg);
8490 %}
8491
8492 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8493 predicate(UseCountTrailingZerosInstruction);
8494 match(Set dst (CountTrailingZerosI (LoadI src)));
8495 effect(KILL cr);
8496 ins_cost(175);
8497 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8498 ins_encode %{
8499 __ tzcntl($dst$$Register, $src$$Address);
8500 %}
8501 ins_pipe(ialu_reg_mem);
8502 %}
8503
8504 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8505 predicate(!UseCountTrailingZerosInstruction);
8506 match(Set dst (CountTrailingZerosI src));
8507 effect(KILL cr);
8508
8509 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8510 "jnz done\n\t"
8511 "movl $dst, 32\n"
8512 "done:" %}
8513 ins_encode %{
8514 Register Rdst = $dst$$Register;
8515 Label done;
8516 __ bsfl(Rdst, $src$$Register);
8517 __ jccb(Assembler::notZero, done);
8518 __ movl(Rdst, BitsPerInt);
8519 __ bind(done);
8520 %}
8521 ins_pipe(ialu_reg);
8522 %}
8523
8524 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8525 predicate(UseCountTrailingZerosInstruction);
8526 match(Set dst (CountTrailingZerosL src));
8527 effect(KILL cr);
8528
8529 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8530 ins_encode %{
8531 __ tzcntq($dst$$Register, $src$$Register);
8532 %}
8533 ins_pipe(ialu_reg);
8534 %}
8535
8536 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8537 predicate(UseCountTrailingZerosInstruction);
8538 match(Set dst (CountTrailingZerosL (LoadL src)));
8539 effect(KILL cr);
8540 ins_cost(175);
8541 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8542 ins_encode %{
8543 __ tzcntq($dst$$Register, $src$$Address);
8544 %}
8545 ins_pipe(ialu_reg_mem);
8546 %}
8547
8548 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8549 predicate(!UseCountTrailingZerosInstruction);
8550 match(Set dst (CountTrailingZerosL src));
8551 effect(KILL cr);
8552
8553 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8554 "jnz done\n\t"
8555 "movl $dst, 64\n"
8556 "done:" %}
8557 ins_encode %{
8558 Register Rdst = $dst$$Register;
8559 Label done;
8560 __ bsfq(Rdst, $src$$Register);
8561 __ jccb(Assembler::notZero, done);
8562 __ movl(Rdst, BitsPerLong);
8563 __ bind(done);
8564 %}
8565 ins_pipe(ialu_reg);
8566 %}
8567
8568 //--------------- Reverse Operation Instructions ----------------
8569 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8570 predicate(!VM_Version::supports_gfni());
8571 match(Set dst (ReverseI src));
8572 effect(TEMP dst, TEMP rtmp, KILL cr);
8573 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8574 ins_encode %{
8575 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8576 %}
8577 ins_pipe( ialu_reg );
8578 %}
8579
8580 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8581 predicate(VM_Version::supports_gfni());
8582 match(Set dst (ReverseI src));
8583 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8584 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8585 ins_encode %{
8586 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8587 %}
8588 ins_pipe( ialu_reg );
8589 %}
8590
8591 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8592 predicate(!VM_Version::supports_gfni());
8593 match(Set dst (ReverseL src));
8594 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8595 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8596 ins_encode %{
8597 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8598 %}
8599 ins_pipe( ialu_reg );
8600 %}
8601
8602 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8603 predicate(VM_Version::supports_gfni());
8604 match(Set dst (ReverseL src));
8605 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8606 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8607 ins_encode %{
8608 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8609 %}
8610 ins_pipe( ialu_reg );
8611 %}
8612
8613 //---------- Population Count Instructions -------------------------------------
8614
8615 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8616 predicate(UsePopCountInstruction);
8617 match(Set dst (PopCountI src));
8618 effect(KILL cr);
8619
8620 format %{ "popcnt $dst, $src" %}
8621 ins_encode %{
8622 __ popcntl($dst$$Register, $src$$Register);
8623 %}
8624 ins_pipe(ialu_reg);
8625 %}
8626
8627 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8628 predicate(UsePopCountInstruction);
8629 match(Set dst (PopCountI (LoadI mem)));
8630 effect(KILL cr);
8631
8632 format %{ "popcnt $dst, $mem" %}
8633 ins_encode %{
8634 __ popcntl($dst$$Register, $mem$$Address);
8635 %}
8636 ins_pipe(ialu_reg);
8637 %}
8638
8639 // Note: Long.bitCount(long) returns an int.
8640 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8641 predicate(UsePopCountInstruction);
8642 match(Set dst (PopCountL src));
8643 effect(KILL cr);
8644
8645 format %{ "popcnt $dst, $src" %}
8646 ins_encode %{
8647 __ popcntq($dst$$Register, $src$$Register);
8648 %}
8649 ins_pipe(ialu_reg);
8650 %}
8651
8652 // Note: Long.bitCount(long) returns an int.
8653 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8654 predicate(UsePopCountInstruction);
8655 match(Set dst (PopCountL (LoadL mem)));
8656 effect(KILL cr);
8657
8658 format %{ "popcnt $dst, $mem" %}
8659 ins_encode %{
8660 __ popcntq($dst$$Register, $mem$$Address);
8661 %}
8662 ins_pipe(ialu_reg);
8663 %}
8664
8665
8666 //----------MemBar Instructions-----------------------------------------------
8667 // Memory barrier flavors
8668
8669 instruct membar_acquire()
8670 %{
8671 match(MemBarAcquire);
8672 match(LoadFence);
8673 ins_cost(0);
8674
8675 size(0);
8676 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8677 ins_encode();
8678 ins_pipe(empty);
8679 %}
8680
8681 instruct membar_acquire_lock()
8682 %{
8683 match(MemBarAcquireLock);
8684 ins_cost(0);
8685
8686 size(0);
8687 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8688 ins_encode();
8689 ins_pipe(empty);
8690 %}
8691
8692 instruct membar_release()
8693 %{
8694 match(MemBarRelease);
8695 match(StoreFence);
8696 ins_cost(0);
8697
8698 size(0);
8699 format %{ "MEMBAR-release ! (empty encoding)" %}
8700 ins_encode();
8701 ins_pipe(empty);
8702 %}
8703
8704 instruct membar_release_lock()
8705 %{
8706 match(MemBarReleaseLock);
8707 ins_cost(0);
8708
8709 size(0);
8710 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8711 ins_encode();
8712 ins_pipe(empty);
8713 %}
8714
8715 instruct membar_volatile(rFlagsReg cr) %{
8716 match(MemBarVolatile);
8717 effect(KILL cr);
8718 ins_cost(400);
8719
8720 format %{
8721 $$template
8722 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8723 %}
8724 ins_encode %{
8725 __ membar(Assembler::StoreLoad);
8726 %}
8727 ins_pipe(pipe_slow);
8728 %}
8729
8730 instruct unnecessary_membar_volatile()
8731 %{
8732 match(MemBarVolatile);
8733 predicate(Matcher::post_store_load_barrier(n));
8734 ins_cost(0);
8735
8736 size(0);
8737 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8738 ins_encode();
8739 ins_pipe(empty);
8740 %}
8741
8742 instruct membar_storestore() %{
8743 match(MemBarStoreStore);
8744 match(StoreStoreFence);
8745 ins_cost(0);
8746
8747 size(0);
8748 format %{ "MEMBAR-storestore (empty encoding)" %}
8749 ins_encode( );
8750 ins_pipe(empty);
8751 %}
8752
8753 //----------Move Instructions--------------------------------------------------
8754
8755 instruct castX2P(rRegP dst, rRegL src)
8756 %{
8757 match(Set dst (CastX2P src));
8758
8759 format %{ "movq $dst, $src\t# long->ptr" %}
8760 ins_encode %{
8761 if ($dst$$reg != $src$$reg) {
8762 __ movptr($dst$$Register, $src$$Register);
8763 }
8764 %}
8765 ins_pipe(ialu_reg_reg); // XXX
8766 %}
8767
8768 instruct castP2X(rRegL dst, rRegP src)
8769 %{
8770 match(Set dst (CastP2X src));
8771
8772 format %{ "movq $dst, $src\t# ptr -> long" %}
8773 ins_encode %{
8774 if ($dst$$reg != $src$$reg) {
8775 __ movptr($dst$$Register, $src$$Register);
8776 }
8777 %}
8778 ins_pipe(ialu_reg_reg); // XXX
8779 %}
8780
8781 // Convert oop into int for vectors alignment masking
8782 instruct convP2I(rRegI dst, rRegP src)
8783 %{
8784 match(Set dst (ConvL2I (CastP2X src)));
8785
8786 format %{ "movl $dst, $src\t# ptr -> int" %}
8787 ins_encode %{
8788 __ movl($dst$$Register, $src$$Register);
8789 %}
8790 ins_pipe(ialu_reg_reg); // XXX
8791 %}
8792
8793 // Convert compressed oop into int for vectors alignment masking
8794 // in case of 32bit oops (heap < 4Gb).
8795 instruct convN2I(rRegI dst, rRegN src)
8796 %{
8797 predicate(CompressedOops::shift() == 0);
8798 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8799
8800 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8801 ins_encode %{
8802 __ movl($dst$$Register, $src$$Register);
8803 %}
8804 ins_pipe(ialu_reg_reg); // XXX
8805 %}
8806
8807 // Convert oop pointer into compressed form
8808 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8809 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8810 match(Set dst (EncodeP src));
8811 effect(KILL cr);
8812 format %{ "encode_heap_oop $dst,$src" %}
8813 ins_encode %{
8814 Register s = $src$$Register;
8815 Register d = $dst$$Register;
8816 if (s != d) {
8817 __ movq(d, s);
8818 }
8819 __ encode_heap_oop(d);
8820 %}
8821 ins_pipe(ialu_reg_long);
8822 %}
8823
8824 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8825 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8826 match(Set dst (EncodeP src));
8827 effect(KILL cr);
8828 format %{ "encode_heap_oop_not_null $dst,$src" %}
8829 ins_encode %{
8830 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8831 %}
8832 ins_pipe(ialu_reg_long);
8833 %}
8834
8835 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8836 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8837 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8838 match(Set dst (DecodeN src));
8839 effect(KILL cr);
8840 format %{ "decode_heap_oop $dst,$src" %}
8841 ins_encode %{
8842 Register s = $src$$Register;
8843 Register d = $dst$$Register;
8844 if (s != d) {
8845 __ movq(d, s);
8846 }
8847 __ decode_heap_oop(d);
8848 %}
8849 ins_pipe(ialu_reg_long);
8850 %}
8851
8852 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8853 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8854 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8855 match(Set dst (DecodeN src));
8856 effect(KILL cr);
8857 format %{ "decode_heap_oop_not_null $dst,$src" %}
8858 ins_encode %{
8859 Register s = $src$$Register;
8860 Register d = $dst$$Register;
8861 if (s != d) {
8862 __ decode_heap_oop_not_null(d, s);
8863 } else {
8864 __ decode_heap_oop_not_null(d);
8865 }
8866 %}
8867 ins_pipe(ialu_reg_long);
8868 %}
8869
8870 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8871 match(Set dst (EncodePKlass src));
8872 effect(TEMP dst, KILL cr);
8873 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8874 ins_encode %{
8875 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8876 %}
8877 ins_pipe(ialu_reg_long);
8878 %}
8879
8880 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8881 match(Set dst (DecodeNKlass src));
8882 effect(TEMP dst, KILL cr);
8883 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8884 ins_encode %{
8885 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8886 %}
8887 ins_pipe(ialu_reg_long);
8888 %}
8889
8890 //----------Conditional Move---------------------------------------------------
8891 // Jump
8892 // dummy instruction for generating temp registers
8893 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8894 match(Jump (LShiftL switch_val shift));
8895 ins_cost(350);
8896 predicate(false);
8897 effect(TEMP dest);
8898
8899 format %{ "leaq $dest, [$constantaddress]\n\t"
8900 "jmp [$dest + $switch_val << $shift]\n\t" %}
8901 ins_encode %{
8902 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8903 // to do that and the compiler is using that register as one it can allocate.
8904 // So we build it all by hand.
8905 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
8906 // ArrayAddress dispatch(table, index);
8907 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
8908 __ lea($dest$$Register, $constantaddress);
8909 __ jmp(dispatch);
8910 %}
8911 ins_pipe(pipe_jmp);
8912 %}
8913
8914 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8915 match(Jump (AddL (LShiftL switch_val shift) offset));
8916 ins_cost(350);
8917 effect(TEMP dest);
8918
8919 format %{ "leaq $dest, [$constantaddress]\n\t"
8920 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
8921 ins_encode %{
8922 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8923 // to do that and the compiler is using that register as one it can allocate.
8924 // So we build it all by hand.
8925 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8926 // ArrayAddress dispatch(table, index);
8927 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8928 __ lea($dest$$Register, $constantaddress);
8929 __ jmp(dispatch);
8930 %}
8931 ins_pipe(pipe_jmp);
8932 %}
8933
8934 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8935 match(Jump switch_val);
8936 ins_cost(350);
8937 effect(TEMP dest);
8938
8939 format %{ "leaq $dest, [$constantaddress]\n\t"
8940 "jmp [$dest + $switch_val]\n\t" %}
8941 ins_encode %{
8942 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8943 // to do that and the compiler is using that register as one it can allocate.
8944 // So we build it all by hand.
8945 // Address index(noreg, switch_reg, Address::times_1);
8946 // ArrayAddress dispatch(table, index);
8947 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
8948 __ lea($dest$$Register, $constantaddress);
8949 __ jmp(dispatch);
8950 %}
8951 ins_pipe(pipe_jmp);
8952 %}
8953
8954 // Conditional move
8955 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
8956 %{
8957 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
8958 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
8959
8960 ins_cost(100); // XXX
8961 format %{ "setbn$cop $dst\t# signed, int" %}
8962 ins_encode %{
8963 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
8964 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
8965 %}
8966 ins_pipe(ialu_reg);
8967 %}
8968
8969 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
8970 %{
8971 predicate(!UseAPX);
8972 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8973
8974 ins_cost(200); // XXX
8975 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8976 ins_encode %{
8977 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
8978 %}
8979 ins_pipe(pipe_cmov_reg);
8980 %}
8981
8982 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
8983 %{
8984 predicate(UseAPX);
8985 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
8986
8987 ins_cost(200);
8988 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
8989 ins_encode %{
8990 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
8991 %}
8992 ins_pipe(pipe_cmov_reg);
8993 %}
8994
8995 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
8996 %{
8997 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
8998 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
8999
9000 ins_cost(100); // XXX
9001 format %{ "setbn$cop $dst\t# unsigned, int" %}
9002 ins_encode %{
9003 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9004 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9005 %}
9006 ins_pipe(ialu_reg);
9007 %}
9008
9009 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9010 predicate(!UseAPX);
9011 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9012
9013 ins_cost(200); // XXX
9014 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9015 ins_encode %{
9016 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9017 %}
9018 ins_pipe(pipe_cmov_reg);
9019 %}
9020
9021 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9022 predicate(UseAPX);
9023 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9024
9025 ins_cost(200);
9026 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9027 ins_encode %{
9028 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9029 %}
9030 ins_pipe(pipe_cmov_reg);
9031 %}
9032
9033 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9034 %{
9035 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9036 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9037
9038 ins_cost(100); // XXX
9039 format %{ "setbn$cop $dst\t# unsigned, int" %}
9040 ins_encode %{
9041 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9042 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9043 %}
9044 ins_pipe(ialu_reg);
9045 %}
9046
9047 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9048 predicate(!UseAPX);
9049 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9050 ins_cost(200);
9051 expand %{
9052 cmovI_regU(cop, cr, dst, src);
9053 %}
9054 %}
9055
9056 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9057 predicate(UseAPX);
9058 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9059 ins_cost(200);
9060 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9061 ins_encode %{
9062 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9063 %}
9064 ins_pipe(pipe_cmov_reg);
9065 %}
9066
9067 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9068 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9069 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9070
9071 ins_cost(200); // XXX
9072 format %{ "cmovpl $dst, $src\n\t"
9073 "cmovnel $dst, $src" %}
9074 ins_encode %{
9075 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9076 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9077 %}
9078 ins_pipe(pipe_cmov_reg);
9079 %}
9080
9081 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9082 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9083 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9084 effect(TEMP dst);
9085
9086 ins_cost(200);
9087 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9088 "cmovnel $dst, $src2" %}
9089 ins_encode %{
9090 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9091 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9092 %}
9093 ins_pipe(pipe_cmov_reg);
9094 %}
9095
9096 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9097 // inputs of the CMove
9098 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9099 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9100 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9101 effect(TEMP dst);
9102
9103 ins_cost(200); // XXX
9104 format %{ "cmovpl $dst, $src\n\t"
9105 "cmovnel $dst, $src" %}
9106 ins_encode %{
9107 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9108 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9109 %}
9110 ins_pipe(pipe_cmov_reg);
9111 %}
9112
9113 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9114 // and parity flag bit is set if any of the operand is a NaN.
9115 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9116 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9117 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9118 effect(TEMP dst);
9119
9120 ins_cost(200);
9121 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9122 "cmovnel $dst, $src2" %}
9123 ins_encode %{
9124 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9125 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9126 %}
9127 ins_pipe(pipe_cmov_reg);
9128 %}
9129
9130 // Conditional move
9131 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9132 predicate(!UseAPX);
9133 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9134
9135 ins_cost(250); // XXX
9136 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9137 ins_encode %{
9138 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9139 %}
9140 ins_pipe(pipe_cmov_mem);
9141 %}
9142
9143 // Conditional move
9144 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9145 %{
9146 predicate(UseAPX);
9147 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9148
9149 ins_cost(250);
9150 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9151 ins_encode %{
9152 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9153 %}
9154 ins_pipe(pipe_cmov_mem);
9155 %}
9156
9157 // Conditional move
9158 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9159 %{
9160 predicate(!UseAPX);
9161 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9162
9163 ins_cost(250); // XXX
9164 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9165 ins_encode %{
9166 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9167 %}
9168 ins_pipe(pipe_cmov_mem);
9169 %}
9170
9171 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9172 predicate(!UseAPX);
9173 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9174 ins_cost(250);
9175 expand %{
9176 cmovI_memU(cop, cr, dst, src);
9177 %}
9178 %}
9179
9180 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9181 %{
9182 predicate(UseAPX);
9183 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9184
9185 ins_cost(250);
9186 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9187 ins_encode %{
9188 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9189 %}
9190 ins_pipe(pipe_cmov_mem);
9191 %}
9192
9193 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9194 %{
9195 predicate(UseAPX);
9196 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9197 ins_cost(250);
9198 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9199 ins_encode %{
9200 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9201 %}
9202 ins_pipe(pipe_cmov_mem);
9203 %}
9204
9205 // Conditional move
9206 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9207 %{
9208 predicate(!UseAPX);
9209 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9210
9211 ins_cost(200); // XXX
9212 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9213 ins_encode %{
9214 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9215 %}
9216 ins_pipe(pipe_cmov_reg);
9217 %}
9218
9219 // Conditional move ndd
9220 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9221 %{
9222 predicate(UseAPX);
9223 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9224
9225 ins_cost(200);
9226 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9227 ins_encode %{
9228 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9229 %}
9230 ins_pipe(pipe_cmov_reg);
9231 %}
9232
9233 // Conditional move
9234 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9235 %{
9236 predicate(!UseAPX);
9237 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9238
9239 ins_cost(200); // XXX
9240 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9241 ins_encode %{
9242 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9243 %}
9244 ins_pipe(pipe_cmov_reg);
9245 %}
9246
9247 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9248 predicate(!UseAPX);
9249 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9250 ins_cost(200);
9251 expand %{
9252 cmovN_regU(cop, cr, dst, src);
9253 %}
9254 %}
9255
9256 // Conditional move ndd
9257 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9258 %{
9259 predicate(UseAPX);
9260 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9261
9262 ins_cost(200);
9263 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9264 ins_encode %{
9265 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9266 %}
9267 ins_pipe(pipe_cmov_reg);
9268 %}
9269
9270 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9271 predicate(UseAPX);
9272 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9273 ins_cost(200);
9274 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9275 ins_encode %{
9276 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9277 %}
9278 ins_pipe(pipe_cmov_reg);
9279 %}
9280
9281 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9282 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9283 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9284
9285 ins_cost(200); // XXX
9286 format %{ "cmovpl $dst, $src\n\t"
9287 "cmovnel $dst, $src" %}
9288 ins_encode %{
9289 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9290 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9291 %}
9292 ins_pipe(pipe_cmov_reg);
9293 %}
9294
9295 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9296 // inputs of the CMove
9297 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9298 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9299 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9300
9301 ins_cost(200); // XXX
9302 format %{ "cmovpl $dst, $src\n\t"
9303 "cmovnel $dst, $src" %}
9304 ins_encode %{
9305 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9306 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9307 %}
9308 ins_pipe(pipe_cmov_reg);
9309 %}
9310
9311 // Conditional move
9312 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9313 %{
9314 predicate(!UseAPX);
9315 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9316
9317 ins_cost(200); // XXX
9318 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9319 ins_encode %{
9320 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9321 %}
9322 ins_pipe(pipe_cmov_reg); // XXX
9323 %}
9324
9325 // Conditional move ndd
9326 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9327 %{
9328 predicate(UseAPX);
9329 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9330
9331 ins_cost(200);
9332 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9333 ins_encode %{
9334 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9335 %}
9336 ins_pipe(pipe_cmov_reg);
9337 %}
9338
9339 // Conditional move
9340 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9341 %{
9342 predicate(!UseAPX);
9343 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9344
9345 ins_cost(200); // XXX
9346 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9347 ins_encode %{
9348 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9349 %}
9350 ins_pipe(pipe_cmov_reg); // XXX
9351 %}
9352
9353 // Conditional move ndd
9354 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9355 %{
9356 predicate(UseAPX);
9357 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9358
9359 ins_cost(200);
9360 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9361 ins_encode %{
9362 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9363 %}
9364 ins_pipe(pipe_cmov_reg);
9365 %}
9366
9367 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9368 predicate(!UseAPX);
9369 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9370 ins_cost(200);
9371 expand %{
9372 cmovP_regU(cop, cr, dst, src);
9373 %}
9374 %}
9375
9376 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9377 predicate(UseAPX);
9378 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9379 ins_cost(200);
9380 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9381 ins_encode %{
9382 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9383 %}
9384 ins_pipe(pipe_cmov_reg);
9385 %}
9386
9387 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9388 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9389 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9390
9391 ins_cost(200); // XXX
9392 format %{ "cmovpq $dst, $src\n\t"
9393 "cmovneq $dst, $src" %}
9394 ins_encode %{
9395 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9396 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9397 %}
9398 ins_pipe(pipe_cmov_reg);
9399 %}
9400
9401 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9402 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9403 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9404 effect(TEMP dst);
9405
9406 ins_cost(200);
9407 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9408 "cmovneq $dst, $src2" %}
9409 ins_encode %{
9410 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9411 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9412 %}
9413 ins_pipe(pipe_cmov_reg);
9414 %}
9415
9416 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9417 // inputs of the CMove
9418 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9419 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9420 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9421
9422 ins_cost(200); // XXX
9423 format %{ "cmovpq $dst, $src\n\t"
9424 "cmovneq $dst, $src" %}
9425 ins_encode %{
9426 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9427 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9428 %}
9429 ins_pipe(pipe_cmov_reg);
9430 %}
9431
9432 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9433 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9434 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9435 effect(TEMP dst);
9436
9437 ins_cost(200);
9438 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9439 "cmovneq $dst, $src2" %}
9440 ins_encode %{
9441 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9442 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9443 %}
9444 ins_pipe(pipe_cmov_reg);
9445 %}
9446
9447 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9448 %{
9449 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9450 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9451
9452 ins_cost(100); // XXX
9453 format %{ "setbn$cop $dst\t# signed, long" %}
9454 ins_encode %{
9455 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9456 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9457 %}
9458 ins_pipe(ialu_reg);
9459 %}
9460
9461 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9462 %{
9463 predicate(!UseAPX);
9464 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9465
9466 ins_cost(200); // XXX
9467 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9468 ins_encode %{
9469 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9470 %}
9471 ins_pipe(pipe_cmov_reg); // XXX
9472 %}
9473
9474 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9475 %{
9476 predicate(UseAPX);
9477 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9478
9479 ins_cost(200);
9480 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9481 ins_encode %{
9482 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9483 %}
9484 ins_pipe(pipe_cmov_reg);
9485 %}
9486
9487 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9488 %{
9489 predicate(!UseAPX);
9490 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9491
9492 ins_cost(200); // XXX
9493 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9494 ins_encode %{
9495 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9496 %}
9497 ins_pipe(pipe_cmov_mem); // XXX
9498 %}
9499
9500 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9501 %{
9502 predicate(UseAPX);
9503 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9504
9505 ins_cost(200);
9506 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9507 ins_encode %{
9508 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9509 %}
9510 ins_pipe(pipe_cmov_mem);
9511 %}
9512
9513 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9514 %{
9515 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9516 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9517
9518 ins_cost(100); // XXX
9519 format %{ "setbn$cop $dst\t# unsigned, long" %}
9520 ins_encode %{
9521 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9522 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9523 %}
9524 ins_pipe(ialu_reg);
9525 %}
9526
9527 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9528 %{
9529 predicate(!UseAPX);
9530 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9531
9532 ins_cost(200); // XXX
9533 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9534 ins_encode %{
9535 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9536 %}
9537 ins_pipe(pipe_cmov_reg); // XXX
9538 %}
9539
9540 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9541 %{
9542 predicate(UseAPX);
9543 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9544
9545 ins_cost(200);
9546 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9547 ins_encode %{
9548 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9549 %}
9550 ins_pipe(pipe_cmov_reg);
9551 %}
9552
9553 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9554 %{
9555 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9556 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9557
9558 ins_cost(100); // XXX
9559 format %{ "setbn$cop $dst\t# unsigned, long" %}
9560 ins_encode %{
9561 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9562 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9563 %}
9564 ins_pipe(ialu_reg);
9565 %}
9566
9567 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9568 predicate(!UseAPX);
9569 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9570 ins_cost(200);
9571 expand %{
9572 cmovL_regU(cop, cr, dst, src);
9573 %}
9574 %}
9575
9576 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9577 %{
9578 predicate(UseAPX);
9579 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9580 ins_cost(200);
9581 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9582 ins_encode %{
9583 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9584 %}
9585 ins_pipe(pipe_cmov_reg);
9586 %}
9587
9588 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9589 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9590 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9591
9592 ins_cost(200); // XXX
9593 format %{ "cmovpq $dst, $src\n\t"
9594 "cmovneq $dst, $src" %}
9595 ins_encode %{
9596 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9597 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9598 %}
9599 ins_pipe(pipe_cmov_reg);
9600 %}
9601
9602 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9603 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9604 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9605 effect(TEMP dst);
9606
9607 ins_cost(200);
9608 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9609 "cmovneq $dst, $src2" %}
9610 ins_encode %{
9611 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9612 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9613 %}
9614 ins_pipe(pipe_cmov_reg);
9615 %}
9616
9617 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9618 // inputs of the CMove
9619 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9620 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9621 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9622
9623 ins_cost(200); // XXX
9624 format %{ "cmovpq $dst, $src\n\t"
9625 "cmovneq $dst, $src" %}
9626 ins_encode %{
9627 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9628 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9629 %}
9630 ins_pipe(pipe_cmov_reg);
9631 %}
9632
9633 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9634 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9635 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9636 effect(TEMP dst);
9637
9638 ins_cost(200);
9639 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9640 "cmovneq $dst, $src2" %}
9641 ins_encode %{
9642 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9643 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9644 %}
9645 ins_pipe(pipe_cmov_reg);
9646 %}
9647
9648 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9649 %{
9650 predicate(!UseAPX);
9651 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9652
9653 ins_cost(200); // XXX
9654 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9655 ins_encode %{
9656 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9657 %}
9658 ins_pipe(pipe_cmov_mem); // XXX
9659 %}
9660
9661 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9662 predicate(!UseAPX);
9663 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9664 ins_cost(200);
9665 expand %{
9666 cmovL_memU(cop, cr, dst, src);
9667 %}
9668 %}
9669
9670 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9671 %{
9672 predicate(UseAPX);
9673 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9674
9675 ins_cost(200);
9676 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9677 ins_encode %{
9678 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9679 %}
9680 ins_pipe(pipe_cmov_mem);
9681 %}
9682
9683 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9684 %{
9685 predicate(UseAPX);
9686 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9687 ins_cost(200);
9688 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9689 ins_encode %{
9690 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9691 %}
9692 ins_pipe(pipe_cmov_mem);
9693 %}
9694
9695 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9696 %{
9697 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9698
9699 ins_cost(200); // XXX
9700 format %{ "jn$cop skip\t# signed cmove float\n\t"
9701 "movss $dst, $src\n"
9702 "skip:" %}
9703 ins_encode %{
9704 Label Lskip;
9705 // Invert sense of branch from sense of CMOV
9706 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9707 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9708 __ bind(Lskip);
9709 %}
9710 ins_pipe(pipe_slow);
9711 %}
9712
9713 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9714 %{
9715 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9716
9717 ins_cost(200); // XXX
9718 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9719 "movss $dst, $src\n"
9720 "skip:" %}
9721 ins_encode %{
9722 Label Lskip;
9723 // Invert sense of branch from sense of CMOV
9724 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9725 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9726 __ bind(Lskip);
9727 %}
9728 ins_pipe(pipe_slow);
9729 %}
9730
9731 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9732 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9733 ins_cost(200);
9734 expand %{
9735 cmovF_regU(cop, cr, dst, src);
9736 %}
9737 %}
9738
9739 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9740 %{
9741 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9742
9743 ins_cost(200); // XXX
9744 format %{ "jn$cop skip\t# signed cmove double\n\t"
9745 "movsd $dst, $src\n"
9746 "skip:" %}
9747 ins_encode %{
9748 Label Lskip;
9749 // Invert sense of branch from sense of CMOV
9750 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9751 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9752 __ bind(Lskip);
9753 %}
9754 ins_pipe(pipe_slow);
9755 %}
9756
9757 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9758 %{
9759 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9760
9761 ins_cost(200); // XXX
9762 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9763 "movsd $dst, $src\n"
9764 "skip:" %}
9765 ins_encode %{
9766 Label Lskip;
9767 // Invert sense of branch from sense of CMOV
9768 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9769 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9770 __ bind(Lskip);
9771 %}
9772 ins_pipe(pipe_slow);
9773 %}
9774
9775 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9776 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9777 ins_cost(200);
9778 expand %{
9779 cmovD_regU(cop, cr, dst, src);
9780 %}
9781 %}
9782
9783 //----------Arithmetic Instructions--------------------------------------------
9784 //----------Addition Instructions----------------------------------------------
9785
9786 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9787 %{
9788 predicate(!UseAPX);
9789 match(Set dst (AddI dst src));
9790 effect(KILL cr);
9791 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9792 format %{ "addl $dst, $src\t# int" %}
9793 ins_encode %{
9794 __ addl($dst$$Register, $src$$Register);
9795 %}
9796 ins_pipe(ialu_reg_reg);
9797 %}
9798
9799 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9800 %{
9801 predicate(UseAPX);
9802 match(Set dst (AddI src1 src2));
9803 effect(KILL cr);
9804 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9805
9806 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9807 ins_encode %{
9808 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9809 %}
9810 ins_pipe(ialu_reg_reg);
9811 %}
9812
9813 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9814 %{
9815 predicate(!UseAPX);
9816 match(Set dst (AddI dst src));
9817 effect(KILL cr);
9818 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9819
9820 format %{ "addl $dst, $src\t# int" %}
9821 ins_encode %{
9822 __ addl($dst$$Register, $src$$constant);
9823 %}
9824 ins_pipe( ialu_reg );
9825 %}
9826
9827 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9828 %{
9829 predicate(UseAPX);
9830 match(Set dst (AddI src1 src2));
9831 effect(KILL cr);
9832 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9833
9834 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9835 ins_encode %{
9836 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9837 %}
9838 ins_pipe( ialu_reg );
9839 %}
9840
9841 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9842 %{
9843 predicate(UseAPX);
9844 match(Set dst (AddI (LoadI src1) src2));
9845 effect(KILL cr);
9846 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9847
9848 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9849 ins_encode %{
9850 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9851 %}
9852 ins_pipe( ialu_reg );
9853 %}
9854
9855 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9856 %{
9857 predicate(!UseAPX);
9858 match(Set dst (AddI dst (LoadI src)));
9859 effect(KILL cr);
9860 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9861
9862 ins_cost(150); // XXX
9863 format %{ "addl $dst, $src\t# int" %}
9864 ins_encode %{
9865 __ addl($dst$$Register, $src$$Address);
9866 %}
9867 ins_pipe(ialu_reg_mem);
9868 %}
9869
9870 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9871 %{
9872 predicate(UseAPX);
9873 match(Set dst (AddI src1 (LoadI src2)));
9874 effect(KILL cr);
9875 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9876
9877 ins_cost(150);
9878 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9879 ins_encode %{
9880 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9881 %}
9882 ins_pipe(ialu_reg_mem);
9883 %}
9884
9885 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9886 %{
9887 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9888 effect(KILL cr);
9889 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9890
9891 ins_cost(150); // XXX
9892 format %{ "addl $dst, $src\t# int" %}
9893 ins_encode %{
9894 __ addl($dst$$Address, $src$$Register);
9895 %}
9896 ins_pipe(ialu_mem_reg);
9897 %}
9898
9899 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9900 %{
9901 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9902 effect(KILL cr);
9903 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9904
9905
9906 ins_cost(125); // XXX
9907 format %{ "addl $dst, $src\t# int" %}
9908 ins_encode %{
9909 __ addl($dst$$Address, $src$$constant);
9910 %}
9911 ins_pipe(ialu_mem_imm);
9912 %}
9913
9914 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9915 %{
9916 predicate(!UseAPX && UseIncDec);
9917 match(Set dst (AddI dst src));
9918 effect(KILL cr);
9919
9920 format %{ "incl $dst\t# int" %}
9921 ins_encode %{
9922 __ incrementl($dst$$Register);
9923 %}
9924 ins_pipe(ialu_reg);
9925 %}
9926
9927 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9928 %{
9929 predicate(UseAPX && UseIncDec);
9930 match(Set dst (AddI src val));
9931 effect(KILL cr);
9932
9933 format %{ "eincl $dst, $src\t# int ndd" %}
9934 ins_encode %{
9935 __ eincl($dst$$Register, $src$$Register, false);
9936 %}
9937 ins_pipe(ialu_reg);
9938 %}
9939
9940 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
9941 %{
9942 predicate(UseAPX && UseIncDec);
9943 match(Set dst (AddI (LoadI src) val));
9944 effect(KILL cr);
9945
9946 format %{ "eincl $dst, $src\t# int ndd" %}
9947 ins_encode %{
9948 __ eincl($dst$$Register, $src$$Address, false);
9949 %}
9950 ins_pipe(ialu_reg);
9951 %}
9952
9953 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
9954 %{
9955 predicate(UseIncDec);
9956 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9957 effect(KILL cr);
9958
9959 ins_cost(125); // XXX
9960 format %{ "incl $dst\t# int" %}
9961 ins_encode %{
9962 __ incrementl($dst$$Address);
9963 %}
9964 ins_pipe(ialu_mem_imm);
9965 %}
9966
9967 // XXX why does that use AddI
9968 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
9969 %{
9970 predicate(!UseAPX && UseIncDec);
9971 match(Set dst (AddI dst src));
9972 effect(KILL cr);
9973
9974 format %{ "decl $dst\t# int" %}
9975 ins_encode %{
9976 __ decrementl($dst$$Register);
9977 %}
9978 ins_pipe(ialu_reg);
9979 %}
9980
9981 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
9982 %{
9983 predicate(UseAPX && UseIncDec);
9984 match(Set dst (AddI src val));
9985 effect(KILL cr);
9986
9987 format %{ "edecl $dst, $src\t# int ndd" %}
9988 ins_encode %{
9989 __ edecl($dst$$Register, $src$$Register, false);
9990 %}
9991 ins_pipe(ialu_reg);
9992 %}
9993
9994 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
9995 %{
9996 predicate(UseAPX && UseIncDec);
9997 match(Set dst (AddI (LoadI src) val));
9998 effect(KILL cr);
9999
10000 format %{ "edecl $dst, $src\t# int ndd" %}
10001 ins_encode %{
10002 __ edecl($dst$$Register, $src$$Address, false);
10003 %}
10004 ins_pipe(ialu_reg);
10005 %}
10006
10007 // XXX why does that use AddI
10008 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10009 %{
10010 predicate(UseIncDec);
10011 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10012 effect(KILL cr);
10013
10014 ins_cost(125); // XXX
10015 format %{ "decl $dst\t# int" %}
10016 ins_encode %{
10017 __ decrementl($dst$$Address);
10018 %}
10019 ins_pipe(ialu_mem_imm);
10020 %}
10021
10022 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10023 %{
10024 predicate(VM_Version::supports_fast_2op_lea());
10025 match(Set dst (AddI (LShiftI index scale) disp));
10026
10027 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10028 ins_encode %{
10029 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10030 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10031 %}
10032 ins_pipe(ialu_reg_reg);
10033 %}
10034
10035 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10036 %{
10037 predicate(VM_Version::supports_fast_3op_lea());
10038 match(Set dst (AddI (AddI base index) disp));
10039
10040 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10041 ins_encode %{
10042 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10043 %}
10044 ins_pipe(ialu_reg_reg);
10045 %}
10046
10047 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10048 %{
10049 predicate(VM_Version::supports_fast_2op_lea());
10050 match(Set dst (AddI base (LShiftI index scale)));
10051
10052 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10053 ins_encode %{
10054 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10055 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10056 %}
10057 ins_pipe(ialu_reg_reg);
10058 %}
10059
10060 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10061 %{
10062 predicate(VM_Version::supports_fast_3op_lea());
10063 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10064
10065 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10066 ins_encode %{
10067 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10068 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10069 %}
10070 ins_pipe(ialu_reg_reg);
10071 %}
10072
10073 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10074 %{
10075 predicate(!UseAPX);
10076 match(Set dst (AddL dst src));
10077 effect(KILL cr);
10078 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10079
10080 format %{ "addq $dst, $src\t# long" %}
10081 ins_encode %{
10082 __ addq($dst$$Register, $src$$Register);
10083 %}
10084 ins_pipe(ialu_reg_reg);
10085 %}
10086
10087 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10088 %{
10089 predicate(UseAPX);
10090 match(Set dst (AddL src1 src2));
10091 effect(KILL cr);
10092 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10093
10094 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10095 ins_encode %{
10096 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10097 %}
10098 ins_pipe(ialu_reg_reg);
10099 %}
10100
10101 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10102 %{
10103 predicate(!UseAPX);
10104 match(Set dst (AddL dst src));
10105 effect(KILL cr);
10106 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10107
10108 format %{ "addq $dst, $src\t# long" %}
10109 ins_encode %{
10110 __ addq($dst$$Register, $src$$constant);
10111 %}
10112 ins_pipe( ialu_reg );
10113 %}
10114
10115 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10116 %{
10117 predicate(UseAPX);
10118 match(Set dst (AddL src1 src2));
10119 effect(KILL cr);
10120 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10121
10122 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10123 ins_encode %{
10124 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10125 %}
10126 ins_pipe( ialu_reg );
10127 %}
10128
10129 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10130 %{
10131 predicate(UseAPX);
10132 match(Set dst (AddL (LoadL src1) src2));
10133 effect(KILL cr);
10134 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10135
10136 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10137 ins_encode %{
10138 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10139 %}
10140 ins_pipe( ialu_reg );
10141 %}
10142
10143 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10144 %{
10145 predicate(!UseAPX);
10146 match(Set dst (AddL dst (LoadL src)));
10147 effect(KILL cr);
10148 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10149
10150 ins_cost(150); // XXX
10151 format %{ "addq $dst, $src\t# long" %}
10152 ins_encode %{
10153 __ addq($dst$$Register, $src$$Address);
10154 %}
10155 ins_pipe(ialu_reg_mem);
10156 %}
10157
10158 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10159 %{
10160 predicate(UseAPX);
10161 match(Set dst (AddL src1 (LoadL src2)));
10162 effect(KILL cr);
10163 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10164
10165 ins_cost(150);
10166 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10167 ins_encode %{
10168 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10169 %}
10170 ins_pipe(ialu_reg_mem);
10171 %}
10172
10173 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10174 %{
10175 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10176 effect(KILL cr);
10177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10178
10179 ins_cost(150); // XXX
10180 format %{ "addq $dst, $src\t# long" %}
10181 ins_encode %{
10182 __ addq($dst$$Address, $src$$Register);
10183 %}
10184 ins_pipe(ialu_mem_reg);
10185 %}
10186
10187 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10188 %{
10189 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10190 effect(KILL cr);
10191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10192
10193 ins_cost(125); // XXX
10194 format %{ "addq $dst, $src\t# long" %}
10195 ins_encode %{
10196 __ addq($dst$$Address, $src$$constant);
10197 %}
10198 ins_pipe(ialu_mem_imm);
10199 %}
10200
10201 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10202 %{
10203 predicate(!UseAPX && UseIncDec);
10204 match(Set dst (AddL dst src));
10205 effect(KILL cr);
10206
10207 format %{ "incq $dst\t# long" %}
10208 ins_encode %{
10209 __ incrementq($dst$$Register);
10210 %}
10211 ins_pipe(ialu_reg);
10212 %}
10213
10214 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10215 %{
10216 predicate(UseAPX && UseIncDec);
10217 match(Set dst (AddL src val));
10218 effect(KILL cr);
10219
10220 format %{ "eincq $dst, $src\t# long ndd" %}
10221 ins_encode %{
10222 __ eincq($dst$$Register, $src$$Register, false);
10223 %}
10224 ins_pipe(ialu_reg);
10225 %}
10226
10227 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10228 %{
10229 predicate(UseAPX && UseIncDec);
10230 match(Set dst (AddL (LoadL src) val));
10231 effect(KILL cr);
10232
10233 format %{ "eincq $dst, $src\t# long ndd" %}
10234 ins_encode %{
10235 __ eincq($dst$$Register, $src$$Address, false);
10236 %}
10237 ins_pipe(ialu_reg);
10238 %}
10239
10240 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10241 %{
10242 predicate(UseIncDec);
10243 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10244 effect(KILL cr);
10245
10246 ins_cost(125); // XXX
10247 format %{ "incq $dst\t# long" %}
10248 ins_encode %{
10249 __ incrementq($dst$$Address);
10250 %}
10251 ins_pipe(ialu_mem_imm);
10252 %}
10253
10254 // XXX why does that use AddL
10255 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10256 %{
10257 predicate(!UseAPX && UseIncDec);
10258 match(Set dst (AddL dst src));
10259 effect(KILL cr);
10260
10261 format %{ "decq $dst\t# long" %}
10262 ins_encode %{
10263 __ decrementq($dst$$Register);
10264 %}
10265 ins_pipe(ialu_reg);
10266 %}
10267
10268 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10269 %{
10270 predicate(UseAPX && UseIncDec);
10271 match(Set dst (AddL src val));
10272 effect(KILL cr);
10273
10274 format %{ "edecq $dst, $src\t# long ndd" %}
10275 ins_encode %{
10276 __ edecq($dst$$Register, $src$$Register, false);
10277 %}
10278 ins_pipe(ialu_reg);
10279 %}
10280
10281 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10282 %{
10283 predicate(UseAPX && UseIncDec);
10284 match(Set dst (AddL (LoadL src) val));
10285 effect(KILL cr);
10286
10287 format %{ "edecq $dst, $src\t# long ndd" %}
10288 ins_encode %{
10289 __ edecq($dst$$Register, $src$$Address, false);
10290 %}
10291 ins_pipe(ialu_reg);
10292 %}
10293
10294 // XXX why does that use AddL
10295 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10296 %{
10297 predicate(UseIncDec);
10298 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10299 effect(KILL cr);
10300
10301 ins_cost(125); // XXX
10302 format %{ "decq $dst\t# long" %}
10303 ins_encode %{
10304 __ decrementq($dst$$Address);
10305 %}
10306 ins_pipe(ialu_mem_imm);
10307 %}
10308
10309 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10310 %{
10311 predicate(VM_Version::supports_fast_2op_lea());
10312 match(Set dst (AddL (LShiftL index scale) disp));
10313
10314 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10315 ins_encode %{
10316 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10317 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10318 %}
10319 ins_pipe(ialu_reg_reg);
10320 %}
10321
10322 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10323 %{
10324 predicate(VM_Version::supports_fast_3op_lea());
10325 match(Set dst (AddL (AddL base index) disp));
10326
10327 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10328 ins_encode %{
10329 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10330 %}
10331 ins_pipe(ialu_reg_reg);
10332 %}
10333
10334 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10335 %{
10336 predicate(VM_Version::supports_fast_2op_lea());
10337 match(Set dst (AddL base (LShiftL index scale)));
10338
10339 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10340 ins_encode %{
10341 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10342 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10343 %}
10344 ins_pipe(ialu_reg_reg);
10345 %}
10346
10347 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10348 %{
10349 predicate(VM_Version::supports_fast_3op_lea());
10350 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10351
10352 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10353 ins_encode %{
10354 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10355 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10356 %}
10357 ins_pipe(ialu_reg_reg);
10358 %}
10359
10360 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10361 %{
10362 match(Set dst (AddP dst src));
10363 effect(KILL cr);
10364 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10365
10366 format %{ "addq $dst, $src\t# ptr" %}
10367 ins_encode %{
10368 __ addq($dst$$Register, $src$$Register);
10369 %}
10370 ins_pipe(ialu_reg_reg);
10371 %}
10372
10373 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10374 %{
10375 match(Set dst (AddP dst src));
10376 effect(KILL cr);
10377 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10378
10379 format %{ "addq $dst, $src\t# ptr" %}
10380 ins_encode %{
10381 __ addq($dst$$Register, $src$$constant);
10382 %}
10383 ins_pipe( ialu_reg );
10384 %}
10385
10386 // XXX addP mem ops ????
10387
10388 instruct checkCastPP(rRegP dst)
10389 %{
10390 match(Set dst (CheckCastPP dst));
10391
10392 size(0);
10393 format %{ "# checkcastPP of $dst" %}
10394 ins_encode(/* empty encoding */);
10395 ins_pipe(empty);
10396 %}
10397
10398 instruct castPP(rRegP dst)
10399 %{
10400 match(Set dst (CastPP dst));
10401
10402 size(0);
10403 format %{ "# castPP of $dst" %}
10404 ins_encode(/* empty encoding */);
10405 ins_pipe(empty);
10406 %}
10407
10408 instruct castII(rRegI dst)
10409 %{
10410 predicate(VerifyConstraintCasts == 0);
10411 match(Set dst (CastII dst));
10412
10413 size(0);
10414 format %{ "# castII of $dst" %}
10415 ins_encode(/* empty encoding */);
10416 ins_cost(0);
10417 ins_pipe(empty);
10418 %}
10419
10420 instruct castII_checked(rRegI dst, rFlagsReg cr)
10421 %{
10422 predicate(VerifyConstraintCasts > 0);
10423 match(Set dst (CastII dst));
10424
10425 effect(KILL cr);
10426 format %{ "# cast_checked_II $dst" %}
10427 ins_encode %{
10428 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10429 %}
10430 ins_pipe(pipe_slow);
10431 %}
10432
10433 instruct castLL(rRegL dst)
10434 %{
10435 predicate(VerifyConstraintCasts == 0);
10436 match(Set dst (CastLL dst));
10437
10438 size(0);
10439 format %{ "# castLL of $dst" %}
10440 ins_encode(/* empty encoding */);
10441 ins_cost(0);
10442 ins_pipe(empty);
10443 %}
10444
10445 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10446 %{
10447 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10448 match(Set dst (CastLL dst));
10449
10450 effect(KILL cr);
10451 format %{ "# cast_checked_LL $dst" %}
10452 ins_encode %{
10453 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10454 %}
10455 ins_pipe(pipe_slow);
10456 %}
10457
10458 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10459 %{
10460 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10461 match(Set dst (CastLL dst));
10462
10463 effect(KILL cr, TEMP tmp);
10464 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10465 ins_encode %{
10466 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10467 %}
10468 ins_pipe(pipe_slow);
10469 %}
10470
10471 instruct castFF(regF dst)
10472 %{
10473 match(Set dst (CastFF dst));
10474
10475 size(0);
10476 format %{ "# castFF of $dst" %}
10477 ins_encode(/* empty encoding */);
10478 ins_cost(0);
10479 ins_pipe(empty);
10480 %}
10481
10482 instruct castHH(regF dst)
10483 %{
10484 match(Set dst (CastHH dst));
10485
10486 size(0);
10487 format %{ "# castHH of $dst" %}
10488 ins_encode(/* empty encoding */);
10489 ins_cost(0);
10490 ins_pipe(empty);
10491 %}
10492
10493 instruct castDD(regD dst)
10494 %{
10495 match(Set dst (CastDD dst));
10496
10497 size(0);
10498 format %{ "# castDD of $dst" %}
10499 ins_encode(/* empty encoding */);
10500 ins_cost(0);
10501 ins_pipe(empty);
10502 %}
10503
10504 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10505 instruct compareAndSwapP(rRegI res,
10506 memory mem_ptr,
10507 rax_RegP oldval, rRegP newval,
10508 rFlagsReg cr)
10509 %{
10510 predicate(n->as_LoadStore()->barrier_data() == 0);
10511 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10512 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10513 effect(KILL cr, KILL oldval);
10514
10515 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10516 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10517 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10518 ins_encode %{
10519 __ lock();
10520 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10521 __ setcc(Assembler::equal, $res$$Register);
10522 %}
10523 ins_pipe( pipe_cmpxchg );
10524 %}
10525
10526 instruct compareAndSwapL(rRegI res,
10527 memory mem_ptr,
10528 rax_RegL oldval, rRegL newval,
10529 rFlagsReg cr)
10530 %{
10531 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10532 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10533 effect(KILL cr, KILL oldval);
10534
10535 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10536 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10537 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10538 ins_encode %{
10539 __ lock();
10540 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10541 __ setcc(Assembler::equal, $res$$Register);
10542 %}
10543 ins_pipe( pipe_cmpxchg );
10544 %}
10545
10546 instruct compareAndSwapI(rRegI res,
10547 memory mem_ptr,
10548 rax_RegI oldval, rRegI newval,
10549 rFlagsReg cr)
10550 %{
10551 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10552 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10553 effect(KILL cr, KILL oldval);
10554
10555 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10556 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10557 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10558 ins_encode %{
10559 __ lock();
10560 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10561 __ setcc(Assembler::equal, $res$$Register);
10562 %}
10563 ins_pipe( pipe_cmpxchg );
10564 %}
10565
10566 instruct compareAndSwapB(rRegI res,
10567 memory mem_ptr,
10568 rax_RegI oldval, rRegI newval,
10569 rFlagsReg cr)
10570 %{
10571 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10572 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10573 effect(KILL cr, KILL oldval);
10574
10575 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10576 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10577 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10578 ins_encode %{
10579 __ lock();
10580 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10581 __ setcc(Assembler::equal, $res$$Register);
10582 %}
10583 ins_pipe( pipe_cmpxchg );
10584 %}
10585
10586 instruct compareAndSwapS(rRegI res,
10587 memory mem_ptr,
10588 rax_RegI oldval, rRegI newval,
10589 rFlagsReg cr)
10590 %{
10591 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10592 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10593 effect(KILL cr, KILL oldval);
10594
10595 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10596 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10597 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10598 ins_encode %{
10599 __ lock();
10600 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10601 __ setcc(Assembler::equal, $res$$Register);
10602 %}
10603 ins_pipe( pipe_cmpxchg );
10604 %}
10605
10606 instruct compareAndSwapN(rRegI res,
10607 memory mem_ptr,
10608 rax_RegN oldval, rRegN newval,
10609 rFlagsReg cr) %{
10610 predicate(n->as_LoadStore()->barrier_data() == 0);
10611 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10612 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10613 effect(KILL cr, KILL oldval);
10614
10615 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10616 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10617 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10618 ins_encode %{
10619 __ lock();
10620 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10621 __ setcc(Assembler::equal, $res$$Register);
10622 %}
10623 ins_pipe( pipe_cmpxchg );
10624 %}
10625
10626 instruct compareAndExchangeB(
10627 memory mem_ptr,
10628 rax_RegI oldval, rRegI newval,
10629 rFlagsReg cr)
10630 %{
10631 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10632 effect(KILL cr);
10633
10634 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10635 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10636 ins_encode %{
10637 __ lock();
10638 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10639 %}
10640 ins_pipe( pipe_cmpxchg );
10641 %}
10642
10643 instruct compareAndExchangeS(
10644 memory mem_ptr,
10645 rax_RegI oldval, rRegI newval,
10646 rFlagsReg cr)
10647 %{
10648 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10649 effect(KILL cr);
10650
10651 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10652 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10653 ins_encode %{
10654 __ lock();
10655 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10656 %}
10657 ins_pipe( pipe_cmpxchg );
10658 %}
10659
10660 instruct compareAndExchangeI(
10661 memory mem_ptr,
10662 rax_RegI oldval, rRegI newval,
10663 rFlagsReg cr)
10664 %{
10665 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10666 effect(KILL cr);
10667
10668 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10669 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10670 ins_encode %{
10671 __ lock();
10672 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10673 %}
10674 ins_pipe( pipe_cmpxchg );
10675 %}
10676
10677 instruct compareAndExchangeL(
10678 memory mem_ptr,
10679 rax_RegL oldval, rRegL newval,
10680 rFlagsReg cr)
10681 %{
10682 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10683 effect(KILL cr);
10684
10685 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10686 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10687 ins_encode %{
10688 __ lock();
10689 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10690 %}
10691 ins_pipe( pipe_cmpxchg );
10692 %}
10693
10694 instruct compareAndExchangeN(
10695 memory mem_ptr,
10696 rax_RegN oldval, rRegN newval,
10697 rFlagsReg cr) %{
10698 predicate(n->as_LoadStore()->barrier_data() == 0);
10699 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10700 effect(KILL cr);
10701
10702 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10703 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10704 ins_encode %{
10705 __ lock();
10706 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10707 %}
10708 ins_pipe( pipe_cmpxchg );
10709 %}
10710
10711 instruct compareAndExchangeP(
10712 memory mem_ptr,
10713 rax_RegP oldval, rRegP newval,
10714 rFlagsReg cr)
10715 %{
10716 predicate(n->as_LoadStore()->barrier_data() == 0);
10717 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10718 effect(KILL cr);
10719
10720 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10721 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10722 ins_encode %{
10723 __ lock();
10724 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10725 %}
10726 ins_pipe( pipe_cmpxchg );
10727 %}
10728
10729 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10730 predicate(n->as_LoadStore()->result_not_used());
10731 match(Set dummy (GetAndAddB mem add));
10732 effect(KILL cr);
10733 format %{ "addb_lock $mem, $add" %}
10734 ins_encode %{
10735 __ lock();
10736 __ addb($mem$$Address, $add$$Register);
10737 %}
10738 ins_pipe(pipe_cmpxchg);
10739 %}
10740
10741 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10742 predicate(n->as_LoadStore()->result_not_used());
10743 match(Set dummy (GetAndAddB mem add));
10744 effect(KILL cr);
10745 format %{ "addb_lock $mem, $add" %}
10746 ins_encode %{
10747 __ lock();
10748 __ addb($mem$$Address, $add$$constant);
10749 %}
10750 ins_pipe(pipe_cmpxchg);
10751 %}
10752
10753 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10754 predicate(!n->as_LoadStore()->result_not_used());
10755 match(Set newval (GetAndAddB mem newval));
10756 effect(KILL cr);
10757 format %{ "xaddb_lock $mem, $newval" %}
10758 ins_encode %{
10759 __ lock();
10760 __ xaddb($mem$$Address, $newval$$Register);
10761 %}
10762 ins_pipe(pipe_cmpxchg);
10763 %}
10764
10765 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10766 predicate(n->as_LoadStore()->result_not_used());
10767 match(Set dummy (GetAndAddS mem add));
10768 effect(KILL cr);
10769 format %{ "addw_lock $mem, $add" %}
10770 ins_encode %{
10771 __ lock();
10772 __ addw($mem$$Address, $add$$Register);
10773 %}
10774 ins_pipe(pipe_cmpxchg);
10775 %}
10776
10777 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10778 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10779 match(Set dummy (GetAndAddS mem add));
10780 effect(KILL cr);
10781 format %{ "addw_lock $mem, $add" %}
10782 ins_encode %{
10783 __ lock();
10784 __ addw($mem$$Address, $add$$constant);
10785 %}
10786 ins_pipe(pipe_cmpxchg);
10787 %}
10788
10789 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10790 predicate(!n->as_LoadStore()->result_not_used());
10791 match(Set newval (GetAndAddS mem newval));
10792 effect(KILL cr);
10793 format %{ "xaddw_lock $mem, $newval" %}
10794 ins_encode %{
10795 __ lock();
10796 __ xaddw($mem$$Address, $newval$$Register);
10797 %}
10798 ins_pipe(pipe_cmpxchg);
10799 %}
10800
10801 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10802 predicate(n->as_LoadStore()->result_not_used());
10803 match(Set dummy (GetAndAddI mem add));
10804 effect(KILL cr);
10805 format %{ "addl_lock $mem, $add" %}
10806 ins_encode %{
10807 __ lock();
10808 __ addl($mem$$Address, $add$$Register);
10809 %}
10810 ins_pipe(pipe_cmpxchg);
10811 %}
10812
10813 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10814 predicate(n->as_LoadStore()->result_not_used());
10815 match(Set dummy (GetAndAddI mem add));
10816 effect(KILL cr);
10817 format %{ "addl_lock $mem, $add" %}
10818 ins_encode %{
10819 __ lock();
10820 __ addl($mem$$Address, $add$$constant);
10821 %}
10822 ins_pipe(pipe_cmpxchg);
10823 %}
10824
10825 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10826 predicate(!n->as_LoadStore()->result_not_used());
10827 match(Set newval (GetAndAddI mem newval));
10828 effect(KILL cr);
10829 format %{ "xaddl_lock $mem, $newval" %}
10830 ins_encode %{
10831 __ lock();
10832 __ xaddl($mem$$Address, $newval$$Register);
10833 %}
10834 ins_pipe(pipe_cmpxchg);
10835 %}
10836
10837 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10838 predicate(n->as_LoadStore()->result_not_used());
10839 match(Set dummy (GetAndAddL mem add));
10840 effect(KILL cr);
10841 format %{ "addq_lock $mem, $add" %}
10842 ins_encode %{
10843 __ lock();
10844 __ addq($mem$$Address, $add$$Register);
10845 %}
10846 ins_pipe(pipe_cmpxchg);
10847 %}
10848
10849 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10850 predicate(n->as_LoadStore()->result_not_used());
10851 match(Set dummy (GetAndAddL mem add));
10852 effect(KILL cr);
10853 format %{ "addq_lock $mem, $add" %}
10854 ins_encode %{
10855 __ lock();
10856 __ addq($mem$$Address, $add$$constant);
10857 %}
10858 ins_pipe(pipe_cmpxchg);
10859 %}
10860
10861 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10862 predicate(!n->as_LoadStore()->result_not_used());
10863 match(Set newval (GetAndAddL mem newval));
10864 effect(KILL cr);
10865 format %{ "xaddq_lock $mem, $newval" %}
10866 ins_encode %{
10867 __ lock();
10868 __ xaddq($mem$$Address, $newval$$Register);
10869 %}
10870 ins_pipe(pipe_cmpxchg);
10871 %}
10872
10873 instruct xchgB( memory mem, rRegI newval) %{
10874 match(Set newval (GetAndSetB mem newval));
10875 format %{ "XCHGB $newval,[$mem]" %}
10876 ins_encode %{
10877 __ xchgb($newval$$Register, $mem$$Address);
10878 %}
10879 ins_pipe( pipe_cmpxchg );
10880 %}
10881
10882 instruct xchgS( memory mem, rRegI newval) %{
10883 match(Set newval (GetAndSetS mem newval));
10884 format %{ "XCHGW $newval,[$mem]" %}
10885 ins_encode %{
10886 __ xchgw($newval$$Register, $mem$$Address);
10887 %}
10888 ins_pipe( pipe_cmpxchg );
10889 %}
10890
10891 instruct xchgI( memory mem, rRegI newval) %{
10892 match(Set newval (GetAndSetI mem newval));
10893 format %{ "XCHGL $newval,[$mem]" %}
10894 ins_encode %{
10895 __ xchgl($newval$$Register, $mem$$Address);
10896 %}
10897 ins_pipe( pipe_cmpxchg );
10898 %}
10899
10900 instruct xchgL( memory mem, rRegL newval) %{
10901 match(Set newval (GetAndSetL mem newval));
10902 format %{ "XCHGL $newval,[$mem]" %}
10903 ins_encode %{
10904 __ xchgq($newval$$Register, $mem$$Address);
10905 %}
10906 ins_pipe( pipe_cmpxchg );
10907 %}
10908
10909 instruct xchgP( memory mem, rRegP newval) %{
10910 match(Set newval (GetAndSetP mem newval));
10911 predicate(n->as_LoadStore()->barrier_data() == 0);
10912 format %{ "XCHGQ $newval,[$mem]" %}
10913 ins_encode %{
10914 __ xchgq($newval$$Register, $mem$$Address);
10915 %}
10916 ins_pipe( pipe_cmpxchg );
10917 %}
10918
10919 instruct xchgN( memory mem, rRegN newval) %{
10920 predicate(n->as_LoadStore()->barrier_data() == 0);
10921 match(Set newval (GetAndSetN mem newval));
10922 format %{ "XCHGL $newval,$mem]" %}
10923 ins_encode %{
10924 __ xchgl($newval$$Register, $mem$$Address);
10925 %}
10926 ins_pipe( pipe_cmpxchg );
10927 %}
10928
10929 //----------Abs Instructions-------------------------------------------
10930
10931 // Integer Absolute Instructions
10932 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10933 %{
10934 match(Set dst (AbsI src));
10935 effect(TEMP dst, KILL cr);
10936 format %{ "xorl $dst, $dst\t# abs int\n\t"
10937 "subl $dst, $src\n\t"
10938 "cmovll $dst, $src" %}
10939 ins_encode %{
10940 __ xorl($dst$$Register, $dst$$Register);
10941 __ subl($dst$$Register, $src$$Register);
10942 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10943 %}
10944
10945 ins_pipe(ialu_reg_reg);
10946 %}
10947
10948 // Long Absolute Instructions
10949 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10950 %{
10951 match(Set dst (AbsL src));
10952 effect(TEMP dst, KILL cr);
10953 format %{ "xorl $dst, $dst\t# abs long\n\t"
10954 "subq $dst, $src\n\t"
10955 "cmovlq $dst, $src" %}
10956 ins_encode %{
10957 __ xorl($dst$$Register, $dst$$Register);
10958 __ subq($dst$$Register, $src$$Register);
10959 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10960 %}
10961
10962 ins_pipe(ialu_reg_reg);
10963 %}
10964
10965 //----------Subtraction Instructions-------------------------------------------
10966
10967 // Integer Subtraction Instructions
10968 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10969 %{
10970 predicate(!UseAPX);
10971 match(Set dst (SubI dst src));
10972 effect(KILL cr);
10973 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10974
10975 format %{ "subl $dst, $src\t# int" %}
10976 ins_encode %{
10977 __ subl($dst$$Register, $src$$Register);
10978 %}
10979 ins_pipe(ialu_reg_reg);
10980 %}
10981
10982 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10983 %{
10984 predicate(UseAPX);
10985 match(Set dst (SubI src1 src2));
10986 effect(KILL cr);
10987 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10988
10989 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10990 ins_encode %{
10991 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10992 %}
10993 ins_pipe(ialu_reg_reg);
10994 %}
10995
10996 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10997 %{
10998 predicate(UseAPX);
10999 match(Set dst (SubI src1 src2));
11000 effect(KILL cr);
11001 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11002
11003 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11004 ins_encode %{
11005 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11006 %}
11007 ins_pipe(ialu_reg_reg);
11008 %}
11009
11010 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11011 %{
11012 predicate(UseAPX);
11013 match(Set dst (SubI (LoadI src1) src2));
11014 effect(KILL cr);
11015 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11016
11017 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11018 ins_encode %{
11019 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11020 %}
11021 ins_pipe(ialu_reg_reg);
11022 %}
11023
11024 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11025 %{
11026 predicate(!UseAPX);
11027 match(Set dst (SubI dst (LoadI src)));
11028 effect(KILL cr);
11029 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11030
11031 ins_cost(150);
11032 format %{ "subl $dst, $src\t# int" %}
11033 ins_encode %{
11034 __ subl($dst$$Register, $src$$Address);
11035 %}
11036 ins_pipe(ialu_reg_mem);
11037 %}
11038
11039 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11040 %{
11041 predicate(UseAPX);
11042 match(Set dst (SubI src1 (LoadI src2)));
11043 effect(KILL cr);
11044 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11045
11046 ins_cost(150);
11047 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11048 ins_encode %{
11049 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11050 %}
11051 ins_pipe(ialu_reg_mem);
11052 %}
11053
11054 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11055 %{
11056 predicate(UseAPX);
11057 match(Set dst (SubI (LoadI src1) src2));
11058 effect(KILL cr);
11059 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11060
11061 ins_cost(150);
11062 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11063 ins_encode %{
11064 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11065 %}
11066 ins_pipe(ialu_reg_mem);
11067 %}
11068
11069 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11070 %{
11071 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11072 effect(KILL cr);
11073 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11074
11075 ins_cost(150);
11076 format %{ "subl $dst, $src\t# int" %}
11077 ins_encode %{
11078 __ subl($dst$$Address, $src$$Register);
11079 %}
11080 ins_pipe(ialu_mem_reg);
11081 %}
11082
11083 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11084 %{
11085 predicate(!UseAPX);
11086 match(Set dst (SubL dst src));
11087 effect(KILL cr);
11088 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11089
11090 format %{ "subq $dst, $src\t# long" %}
11091 ins_encode %{
11092 __ subq($dst$$Register, $src$$Register);
11093 %}
11094 ins_pipe(ialu_reg_reg);
11095 %}
11096
11097 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11098 %{
11099 predicate(UseAPX);
11100 match(Set dst (SubL src1 src2));
11101 effect(KILL cr);
11102 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11103
11104 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11105 ins_encode %{
11106 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11107 %}
11108 ins_pipe(ialu_reg_reg);
11109 %}
11110
11111 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11112 %{
11113 predicate(UseAPX);
11114 match(Set dst (SubL src1 src2));
11115 effect(KILL cr);
11116 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11117
11118 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11119 ins_encode %{
11120 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11121 %}
11122 ins_pipe(ialu_reg_reg);
11123 %}
11124
11125 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11126 %{
11127 predicate(UseAPX);
11128 match(Set dst (SubL (LoadL src1) src2));
11129 effect(KILL cr);
11130 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11131
11132 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11133 ins_encode %{
11134 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11135 %}
11136 ins_pipe(ialu_reg_reg);
11137 %}
11138
11139 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11140 %{
11141 predicate(!UseAPX);
11142 match(Set dst (SubL dst (LoadL src)));
11143 effect(KILL cr);
11144 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11145
11146 ins_cost(150);
11147 format %{ "subq $dst, $src\t# long" %}
11148 ins_encode %{
11149 __ subq($dst$$Register, $src$$Address);
11150 %}
11151 ins_pipe(ialu_reg_mem);
11152 %}
11153
11154 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11155 %{
11156 predicate(UseAPX);
11157 match(Set dst (SubL src1 (LoadL src2)));
11158 effect(KILL cr);
11159 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11160
11161 ins_cost(150);
11162 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11163 ins_encode %{
11164 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11165 %}
11166 ins_pipe(ialu_reg_mem);
11167 %}
11168
11169 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11170 %{
11171 predicate(UseAPX);
11172 match(Set dst (SubL (LoadL src1) src2));
11173 effect(KILL cr);
11174 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11175
11176 ins_cost(150);
11177 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11178 ins_encode %{
11179 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11180 %}
11181 ins_pipe(ialu_reg_mem);
11182 %}
11183
11184 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11185 %{
11186 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11187 effect(KILL cr);
11188 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11189
11190 ins_cost(150);
11191 format %{ "subq $dst, $src\t# long" %}
11192 ins_encode %{
11193 __ subq($dst$$Address, $src$$Register);
11194 %}
11195 ins_pipe(ialu_mem_reg);
11196 %}
11197
11198 // Subtract from a pointer
11199 // XXX hmpf???
11200 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11201 %{
11202 match(Set dst (AddP dst (SubI zero src)));
11203 effect(KILL cr);
11204
11205 format %{ "subq $dst, $src\t# ptr - int" %}
11206 ins_encode %{
11207 __ subq($dst$$Register, $src$$Register);
11208 %}
11209 ins_pipe(ialu_reg_reg);
11210 %}
11211
11212 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11213 %{
11214 predicate(!UseAPX);
11215 match(Set dst (SubI zero dst));
11216 effect(KILL cr);
11217 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11218
11219 format %{ "negl $dst\t# int" %}
11220 ins_encode %{
11221 __ negl($dst$$Register);
11222 %}
11223 ins_pipe(ialu_reg);
11224 %}
11225
11226 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11227 %{
11228 predicate(UseAPX);
11229 match(Set dst (SubI zero src));
11230 effect(KILL cr);
11231 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11232
11233 format %{ "enegl $dst, $src\t# int ndd" %}
11234 ins_encode %{
11235 __ enegl($dst$$Register, $src$$Register, false);
11236 %}
11237 ins_pipe(ialu_reg);
11238 %}
11239
11240 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11241 %{
11242 predicate(!UseAPX);
11243 match(Set dst (NegI dst));
11244 effect(KILL cr);
11245 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11246
11247 format %{ "negl $dst\t# int" %}
11248 ins_encode %{
11249 __ negl($dst$$Register);
11250 %}
11251 ins_pipe(ialu_reg);
11252 %}
11253
11254 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11255 %{
11256 predicate(UseAPX);
11257 match(Set dst (NegI src));
11258 effect(KILL cr);
11259 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11260
11261 format %{ "enegl $dst, $src\t# int ndd" %}
11262 ins_encode %{
11263 __ enegl($dst$$Register, $src$$Register, false);
11264 %}
11265 ins_pipe(ialu_reg);
11266 %}
11267
11268 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11269 %{
11270 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11271 effect(KILL cr);
11272 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11273
11274 format %{ "negl $dst\t# int" %}
11275 ins_encode %{
11276 __ negl($dst$$Address);
11277 %}
11278 ins_pipe(ialu_reg);
11279 %}
11280
11281 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11282 %{
11283 predicate(!UseAPX);
11284 match(Set dst (SubL zero dst));
11285 effect(KILL cr);
11286 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11287
11288 format %{ "negq $dst\t# long" %}
11289 ins_encode %{
11290 __ negq($dst$$Register);
11291 %}
11292 ins_pipe(ialu_reg);
11293 %}
11294
11295 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11296 %{
11297 predicate(UseAPX);
11298 match(Set dst (SubL zero src));
11299 effect(KILL cr);
11300 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11301
11302 format %{ "enegq $dst, $src\t# long ndd" %}
11303 ins_encode %{
11304 __ enegq($dst$$Register, $src$$Register, false);
11305 %}
11306 ins_pipe(ialu_reg);
11307 %}
11308
11309 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11310 %{
11311 predicate(!UseAPX);
11312 match(Set dst (NegL dst));
11313 effect(KILL cr);
11314 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11315
11316 format %{ "negq $dst\t# int" %}
11317 ins_encode %{
11318 __ negq($dst$$Register);
11319 %}
11320 ins_pipe(ialu_reg);
11321 %}
11322
11323 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11324 %{
11325 predicate(UseAPX);
11326 match(Set dst (NegL src));
11327 effect(KILL cr);
11328 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11329
11330 format %{ "enegq $dst, $src\t# long ndd" %}
11331 ins_encode %{
11332 __ enegq($dst$$Register, $src$$Register, false);
11333 %}
11334 ins_pipe(ialu_reg);
11335 %}
11336
11337 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11338 %{
11339 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11340 effect(KILL cr);
11341 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11342
11343 format %{ "negq $dst\t# long" %}
11344 ins_encode %{
11345 __ negq($dst$$Address);
11346 %}
11347 ins_pipe(ialu_reg);
11348 %}
11349
11350 //----------Multiplication/Division Instructions-------------------------------
11351 // Integer Multiplication Instructions
11352 // Multiply Register
11353
11354 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11355 %{
11356 predicate(!UseAPX);
11357 match(Set dst (MulI dst src));
11358 effect(KILL cr);
11359
11360 ins_cost(300);
11361 format %{ "imull $dst, $src\t# int" %}
11362 ins_encode %{
11363 __ imull($dst$$Register, $src$$Register);
11364 %}
11365 ins_pipe(ialu_reg_reg_alu0);
11366 %}
11367
11368 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11369 %{
11370 predicate(UseAPX);
11371 match(Set dst (MulI src1 src2));
11372 effect(KILL cr);
11373
11374 ins_cost(300);
11375 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11376 ins_encode %{
11377 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11378 %}
11379 ins_pipe(ialu_reg_reg_alu0);
11380 %}
11381
11382 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11383 %{
11384 match(Set dst (MulI src imm));
11385 effect(KILL cr);
11386
11387 ins_cost(300);
11388 format %{ "imull $dst, $src, $imm\t# int" %}
11389 ins_encode %{
11390 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11391 %}
11392 ins_pipe(ialu_reg_reg_alu0);
11393 %}
11394
11395 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11396 %{
11397 predicate(!UseAPX);
11398 match(Set dst (MulI dst (LoadI src)));
11399 effect(KILL cr);
11400
11401 ins_cost(350);
11402 format %{ "imull $dst, $src\t# int" %}
11403 ins_encode %{
11404 __ imull($dst$$Register, $src$$Address);
11405 %}
11406 ins_pipe(ialu_reg_mem_alu0);
11407 %}
11408
11409 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11410 %{
11411 predicate(UseAPX);
11412 match(Set dst (MulI src1 (LoadI src2)));
11413 effect(KILL cr);
11414
11415 ins_cost(350);
11416 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11417 ins_encode %{
11418 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11419 %}
11420 ins_pipe(ialu_reg_mem_alu0);
11421 %}
11422
11423 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11424 %{
11425 match(Set dst (MulI (LoadI src) imm));
11426 effect(KILL cr);
11427
11428 ins_cost(300);
11429 format %{ "imull $dst, $src, $imm\t# int" %}
11430 ins_encode %{
11431 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11432 %}
11433 ins_pipe(ialu_reg_mem_alu0);
11434 %}
11435
11436 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11437 %{
11438 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11439 effect(KILL cr, KILL src2);
11440
11441 expand %{ mulI_rReg(dst, src1, cr);
11442 mulI_rReg(src2, src3, cr);
11443 addI_rReg(dst, src2, cr); %}
11444 %}
11445
11446 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11447 %{
11448 predicate(!UseAPX);
11449 match(Set dst (MulL dst src));
11450 effect(KILL cr);
11451
11452 ins_cost(300);
11453 format %{ "imulq $dst, $src\t# long" %}
11454 ins_encode %{
11455 __ imulq($dst$$Register, $src$$Register);
11456 %}
11457 ins_pipe(ialu_reg_reg_alu0);
11458 %}
11459
11460 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11461 %{
11462 predicate(UseAPX);
11463 match(Set dst (MulL src1 src2));
11464 effect(KILL cr);
11465
11466 ins_cost(300);
11467 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11468 ins_encode %{
11469 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11470 %}
11471 ins_pipe(ialu_reg_reg_alu0);
11472 %}
11473
11474 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11475 %{
11476 match(Set dst (MulL src imm));
11477 effect(KILL cr);
11478
11479 ins_cost(300);
11480 format %{ "imulq $dst, $src, $imm\t# long" %}
11481 ins_encode %{
11482 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11483 %}
11484 ins_pipe(ialu_reg_reg_alu0);
11485 %}
11486
11487 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11488 %{
11489 predicate(!UseAPX);
11490 match(Set dst (MulL dst (LoadL src)));
11491 effect(KILL cr);
11492
11493 ins_cost(350);
11494 format %{ "imulq $dst, $src\t# long" %}
11495 ins_encode %{
11496 __ imulq($dst$$Register, $src$$Address);
11497 %}
11498 ins_pipe(ialu_reg_mem_alu0);
11499 %}
11500
11501 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11502 %{
11503 predicate(UseAPX);
11504 match(Set dst (MulL src1 (LoadL src2)));
11505 effect(KILL cr);
11506
11507 ins_cost(350);
11508 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11509 ins_encode %{
11510 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11511 %}
11512 ins_pipe(ialu_reg_mem_alu0);
11513 %}
11514
11515 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11516 %{
11517 match(Set dst (MulL (LoadL src) imm));
11518 effect(KILL cr);
11519
11520 ins_cost(300);
11521 format %{ "imulq $dst, $src, $imm\t# long" %}
11522 ins_encode %{
11523 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11524 %}
11525 ins_pipe(ialu_reg_mem_alu0);
11526 %}
11527
11528 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11529 %{
11530 match(Set dst (MulHiL src rax));
11531 effect(USE_KILL rax, KILL cr);
11532
11533 ins_cost(300);
11534 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11535 ins_encode %{
11536 __ imulq($src$$Register);
11537 %}
11538 ins_pipe(ialu_reg_reg_alu0);
11539 %}
11540
11541 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11542 %{
11543 match(Set dst (UMulHiL src rax));
11544 effect(USE_KILL rax, KILL cr);
11545
11546 ins_cost(300);
11547 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11548 ins_encode %{
11549 __ mulq($src$$Register);
11550 %}
11551 ins_pipe(ialu_reg_reg_alu0);
11552 %}
11553
11554 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11555 rFlagsReg cr)
11556 %{
11557 match(Set rax (DivI rax div));
11558 effect(KILL rdx, KILL cr);
11559
11560 ins_cost(30*100+10*100); // XXX
11561 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11562 "jne,s normal\n\t"
11563 "xorl rdx, rdx\n\t"
11564 "cmpl $div, -1\n\t"
11565 "je,s done\n"
11566 "normal: cdql\n\t"
11567 "idivl $div\n"
11568 "done:" %}
11569 ins_encode(cdql_enc(div));
11570 ins_pipe(ialu_reg_reg_alu0);
11571 %}
11572
11573 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11574 rFlagsReg cr)
11575 %{
11576 match(Set rax (DivL rax div));
11577 effect(KILL rdx, KILL cr);
11578
11579 ins_cost(30*100+10*100); // XXX
11580 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11581 "cmpq rax, rdx\n\t"
11582 "jne,s normal\n\t"
11583 "xorl rdx, rdx\n\t"
11584 "cmpq $div, -1\n\t"
11585 "je,s done\n"
11586 "normal: cdqq\n\t"
11587 "idivq $div\n"
11588 "done:" %}
11589 ins_encode(cdqq_enc(div));
11590 ins_pipe(ialu_reg_reg_alu0);
11591 %}
11592
11593 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11594 %{
11595 match(Set rax (UDivI rax div));
11596 effect(KILL rdx, KILL cr);
11597
11598 ins_cost(300);
11599 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11600 ins_encode %{
11601 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11602 %}
11603 ins_pipe(ialu_reg_reg_alu0);
11604 %}
11605
11606 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11607 %{
11608 match(Set rax (UDivL rax div));
11609 effect(KILL rdx, KILL cr);
11610
11611 ins_cost(300);
11612 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11613 ins_encode %{
11614 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11615 %}
11616 ins_pipe(ialu_reg_reg_alu0);
11617 %}
11618
11619 // Integer DIVMOD with Register, both quotient and mod results
11620 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11621 rFlagsReg cr)
11622 %{
11623 match(DivModI rax div);
11624 effect(KILL cr);
11625
11626 ins_cost(30*100+10*100); // XXX
11627 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11628 "jne,s normal\n\t"
11629 "xorl rdx, rdx\n\t"
11630 "cmpl $div, -1\n\t"
11631 "je,s done\n"
11632 "normal: cdql\n\t"
11633 "idivl $div\n"
11634 "done:" %}
11635 ins_encode(cdql_enc(div));
11636 ins_pipe(pipe_slow);
11637 %}
11638
11639 // Long DIVMOD with Register, both quotient and mod results
11640 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11641 rFlagsReg cr)
11642 %{
11643 match(DivModL rax div);
11644 effect(KILL cr);
11645
11646 ins_cost(30*100+10*100); // XXX
11647 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11648 "cmpq rax, rdx\n\t"
11649 "jne,s normal\n\t"
11650 "xorl rdx, rdx\n\t"
11651 "cmpq $div, -1\n\t"
11652 "je,s done\n"
11653 "normal: cdqq\n\t"
11654 "idivq $div\n"
11655 "done:" %}
11656 ins_encode(cdqq_enc(div));
11657 ins_pipe(pipe_slow);
11658 %}
11659
11660 // Unsigned integer DIVMOD with Register, both quotient and mod results
11661 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11662 no_rax_rdx_RegI div, rFlagsReg cr)
11663 %{
11664 match(UDivModI rax div);
11665 effect(TEMP tmp, KILL cr);
11666
11667 ins_cost(300);
11668 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11669 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11670 %}
11671 ins_encode %{
11672 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11673 %}
11674 ins_pipe(pipe_slow);
11675 %}
11676
11677 // Unsigned long DIVMOD with Register, both quotient and mod results
11678 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11679 no_rax_rdx_RegL div, rFlagsReg cr)
11680 %{
11681 match(UDivModL rax div);
11682 effect(TEMP tmp, KILL cr);
11683
11684 ins_cost(300);
11685 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11686 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11687 %}
11688 ins_encode %{
11689 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11690 %}
11691 ins_pipe(pipe_slow);
11692 %}
11693
11694 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11695 rFlagsReg cr)
11696 %{
11697 match(Set rdx (ModI rax div));
11698 effect(KILL rax, KILL cr);
11699
11700 ins_cost(300); // XXX
11701 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11702 "jne,s normal\n\t"
11703 "xorl rdx, rdx\n\t"
11704 "cmpl $div, -1\n\t"
11705 "je,s done\n"
11706 "normal: cdql\n\t"
11707 "idivl $div\n"
11708 "done:" %}
11709 ins_encode(cdql_enc(div));
11710 ins_pipe(ialu_reg_reg_alu0);
11711 %}
11712
11713 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11714 rFlagsReg cr)
11715 %{
11716 match(Set rdx (ModL rax div));
11717 effect(KILL rax, KILL cr);
11718
11719 ins_cost(300); // XXX
11720 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11721 "cmpq rax, rdx\n\t"
11722 "jne,s normal\n\t"
11723 "xorl rdx, rdx\n\t"
11724 "cmpq $div, -1\n\t"
11725 "je,s done\n"
11726 "normal: cdqq\n\t"
11727 "idivq $div\n"
11728 "done:" %}
11729 ins_encode(cdqq_enc(div));
11730 ins_pipe(ialu_reg_reg_alu0);
11731 %}
11732
11733 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11734 %{
11735 match(Set rdx (UModI rax div));
11736 effect(KILL rax, KILL cr);
11737
11738 ins_cost(300);
11739 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11740 ins_encode %{
11741 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11742 %}
11743 ins_pipe(ialu_reg_reg_alu0);
11744 %}
11745
11746 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11747 %{
11748 match(Set rdx (UModL rax div));
11749 effect(KILL rax, KILL cr);
11750
11751 ins_cost(300);
11752 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11753 ins_encode %{
11754 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11755 %}
11756 ins_pipe(ialu_reg_reg_alu0);
11757 %}
11758
11759 // Integer Shift Instructions
11760 // Shift Left by one, two, three
11761 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11762 %{
11763 predicate(!UseAPX);
11764 match(Set dst (LShiftI dst shift));
11765 effect(KILL cr);
11766
11767 format %{ "sall $dst, $shift" %}
11768 ins_encode %{
11769 __ sall($dst$$Register, $shift$$constant);
11770 %}
11771 ins_pipe(ialu_reg);
11772 %}
11773
11774 // Shift Left by one, two, three
11775 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11776 %{
11777 predicate(UseAPX);
11778 match(Set dst (LShiftI src shift));
11779 effect(KILL cr);
11780
11781 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11782 ins_encode %{
11783 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11784 %}
11785 ins_pipe(ialu_reg);
11786 %}
11787
11788 // Shift Left by 8-bit immediate
11789 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11790 %{
11791 predicate(!UseAPX);
11792 match(Set dst (LShiftI dst shift));
11793 effect(KILL cr);
11794
11795 format %{ "sall $dst, $shift" %}
11796 ins_encode %{
11797 __ sall($dst$$Register, $shift$$constant);
11798 %}
11799 ins_pipe(ialu_reg);
11800 %}
11801
11802 // Shift Left by 8-bit immediate
11803 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11804 %{
11805 predicate(UseAPX);
11806 match(Set dst (LShiftI src shift));
11807 effect(KILL cr);
11808
11809 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11810 ins_encode %{
11811 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11812 %}
11813 ins_pipe(ialu_reg);
11814 %}
11815
11816 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11817 %{
11818 predicate(UseAPX);
11819 match(Set dst (LShiftI (LoadI src) shift));
11820 effect(KILL cr);
11821
11822 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11823 ins_encode %{
11824 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11825 %}
11826 ins_pipe(ialu_reg);
11827 %}
11828
11829 // Shift Left by 8-bit immediate
11830 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11831 %{
11832 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11833 effect(KILL cr);
11834
11835 format %{ "sall $dst, $shift" %}
11836 ins_encode %{
11837 __ sall($dst$$Address, $shift$$constant);
11838 %}
11839 ins_pipe(ialu_mem_imm);
11840 %}
11841
11842 // Shift Left by variable
11843 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11844 %{
11845 predicate(!VM_Version::supports_bmi2());
11846 match(Set dst (LShiftI dst shift));
11847 effect(KILL cr);
11848
11849 format %{ "sall $dst, $shift" %}
11850 ins_encode %{
11851 __ sall($dst$$Register);
11852 %}
11853 ins_pipe(ialu_reg_reg);
11854 %}
11855
11856 // Shift Left by variable
11857 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11858 %{
11859 predicate(!VM_Version::supports_bmi2());
11860 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11861 effect(KILL cr);
11862
11863 format %{ "sall $dst, $shift" %}
11864 ins_encode %{
11865 __ sall($dst$$Address);
11866 %}
11867 ins_pipe(ialu_mem_reg);
11868 %}
11869
11870 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11871 %{
11872 predicate(VM_Version::supports_bmi2());
11873 match(Set dst (LShiftI src shift));
11874
11875 format %{ "shlxl $dst, $src, $shift" %}
11876 ins_encode %{
11877 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11878 %}
11879 ins_pipe(ialu_reg_reg);
11880 %}
11881
11882 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11883 %{
11884 predicate(VM_Version::supports_bmi2());
11885 match(Set dst (LShiftI (LoadI src) shift));
11886 ins_cost(175);
11887 format %{ "shlxl $dst, $src, $shift" %}
11888 ins_encode %{
11889 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11890 %}
11891 ins_pipe(ialu_reg_mem);
11892 %}
11893
11894 // Arithmetic Shift Right by 8-bit immediate
11895 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11896 %{
11897 predicate(!UseAPX);
11898 match(Set dst (RShiftI dst shift));
11899 effect(KILL cr);
11900
11901 format %{ "sarl $dst, $shift" %}
11902 ins_encode %{
11903 __ sarl($dst$$Register, $shift$$constant);
11904 %}
11905 ins_pipe(ialu_mem_imm);
11906 %}
11907
11908 // Arithmetic Shift Right by 8-bit immediate
11909 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11910 %{
11911 predicate(UseAPX);
11912 match(Set dst (RShiftI src shift));
11913 effect(KILL cr);
11914
11915 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11916 ins_encode %{
11917 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11918 %}
11919 ins_pipe(ialu_mem_imm);
11920 %}
11921
11922 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11923 %{
11924 predicate(UseAPX);
11925 match(Set dst (RShiftI (LoadI src) shift));
11926 effect(KILL cr);
11927
11928 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11929 ins_encode %{
11930 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11931 %}
11932 ins_pipe(ialu_mem_imm);
11933 %}
11934
11935 // Arithmetic Shift Right by 8-bit immediate
11936 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11937 %{
11938 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11939 effect(KILL cr);
11940
11941 format %{ "sarl $dst, $shift" %}
11942 ins_encode %{
11943 __ sarl($dst$$Address, $shift$$constant);
11944 %}
11945 ins_pipe(ialu_mem_imm);
11946 %}
11947
11948 // Arithmetic Shift Right by variable
11949 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11950 %{
11951 predicate(!VM_Version::supports_bmi2());
11952 match(Set dst (RShiftI dst shift));
11953 effect(KILL cr);
11954
11955 format %{ "sarl $dst, $shift" %}
11956 ins_encode %{
11957 __ sarl($dst$$Register);
11958 %}
11959 ins_pipe(ialu_reg_reg);
11960 %}
11961
11962 // Arithmetic Shift Right by variable
11963 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11964 %{
11965 predicate(!VM_Version::supports_bmi2());
11966 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11967 effect(KILL cr);
11968
11969 format %{ "sarl $dst, $shift" %}
11970 ins_encode %{
11971 __ sarl($dst$$Address);
11972 %}
11973 ins_pipe(ialu_mem_reg);
11974 %}
11975
11976 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11977 %{
11978 predicate(VM_Version::supports_bmi2());
11979 match(Set dst (RShiftI src shift));
11980
11981 format %{ "sarxl $dst, $src, $shift" %}
11982 ins_encode %{
11983 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11984 %}
11985 ins_pipe(ialu_reg_reg);
11986 %}
11987
11988 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11989 %{
11990 predicate(VM_Version::supports_bmi2());
11991 match(Set dst (RShiftI (LoadI src) shift));
11992 ins_cost(175);
11993 format %{ "sarxl $dst, $src, $shift" %}
11994 ins_encode %{
11995 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11996 %}
11997 ins_pipe(ialu_reg_mem);
11998 %}
11999
12000 // Logical Shift Right by 8-bit immediate
12001 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12002 %{
12003 predicate(!UseAPX);
12004 match(Set dst (URShiftI dst shift));
12005 effect(KILL cr);
12006
12007 format %{ "shrl $dst, $shift" %}
12008 ins_encode %{
12009 __ shrl($dst$$Register, $shift$$constant);
12010 %}
12011 ins_pipe(ialu_reg);
12012 %}
12013
12014 // Logical Shift Right by 8-bit immediate
12015 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12016 %{
12017 predicate(UseAPX);
12018 match(Set dst (URShiftI src shift));
12019 effect(KILL cr);
12020
12021 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12022 ins_encode %{
12023 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12024 %}
12025 ins_pipe(ialu_reg);
12026 %}
12027
12028 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12029 %{
12030 predicate(UseAPX);
12031 match(Set dst (URShiftI (LoadI src) shift));
12032 effect(KILL cr);
12033
12034 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12035 ins_encode %{
12036 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12037 %}
12038 ins_pipe(ialu_reg);
12039 %}
12040
12041 // Logical Shift Right by 8-bit immediate
12042 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12043 %{
12044 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12045 effect(KILL cr);
12046
12047 format %{ "shrl $dst, $shift" %}
12048 ins_encode %{
12049 __ shrl($dst$$Address, $shift$$constant);
12050 %}
12051 ins_pipe(ialu_mem_imm);
12052 %}
12053
12054 // Logical Shift Right by variable
12055 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12056 %{
12057 predicate(!VM_Version::supports_bmi2());
12058 match(Set dst (URShiftI dst shift));
12059 effect(KILL cr);
12060
12061 format %{ "shrl $dst, $shift" %}
12062 ins_encode %{
12063 __ shrl($dst$$Register);
12064 %}
12065 ins_pipe(ialu_reg_reg);
12066 %}
12067
12068 // Logical Shift Right by variable
12069 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12070 %{
12071 predicate(!VM_Version::supports_bmi2());
12072 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12073 effect(KILL cr);
12074
12075 format %{ "shrl $dst, $shift" %}
12076 ins_encode %{
12077 __ shrl($dst$$Address);
12078 %}
12079 ins_pipe(ialu_mem_reg);
12080 %}
12081
12082 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12083 %{
12084 predicate(VM_Version::supports_bmi2());
12085 match(Set dst (URShiftI src shift));
12086
12087 format %{ "shrxl $dst, $src, $shift" %}
12088 ins_encode %{
12089 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12090 %}
12091 ins_pipe(ialu_reg_reg);
12092 %}
12093
12094 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12095 %{
12096 predicate(VM_Version::supports_bmi2());
12097 match(Set dst (URShiftI (LoadI src) shift));
12098 ins_cost(175);
12099 format %{ "shrxl $dst, $src, $shift" %}
12100 ins_encode %{
12101 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12102 %}
12103 ins_pipe(ialu_reg_mem);
12104 %}
12105
12106 // Long Shift Instructions
12107 // Shift Left by one, two, three
12108 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12109 %{
12110 predicate(!UseAPX);
12111 match(Set dst (LShiftL dst shift));
12112 effect(KILL cr);
12113
12114 format %{ "salq $dst, $shift" %}
12115 ins_encode %{
12116 __ salq($dst$$Register, $shift$$constant);
12117 %}
12118 ins_pipe(ialu_reg);
12119 %}
12120
12121 // Shift Left by one, two, three
12122 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12123 %{
12124 predicate(UseAPX);
12125 match(Set dst (LShiftL src shift));
12126 effect(KILL cr);
12127
12128 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12129 ins_encode %{
12130 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12131 %}
12132 ins_pipe(ialu_reg);
12133 %}
12134
12135 // Shift Left by 8-bit immediate
12136 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12137 %{
12138 predicate(!UseAPX);
12139 match(Set dst (LShiftL dst shift));
12140 effect(KILL cr);
12141
12142 format %{ "salq $dst, $shift" %}
12143 ins_encode %{
12144 __ salq($dst$$Register, $shift$$constant);
12145 %}
12146 ins_pipe(ialu_reg);
12147 %}
12148
12149 // Shift Left by 8-bit immediate
12150 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12151 %{
12152 predicate(UseAPX);
12153 match(Set dst (LShiftL src shift));
12154 effect(KILL cr);
12155
12156 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12157 ins_encode %{
12158 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12159 %}
12160 ins_pipe(ialu_reg);
12161 %}
12162
12163 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12164 %{
12165 predicate(UseAPX);
12166 match(Set dst (LShiftL (LoadL src) shift));
12167 effect(KILL cr);
12168
12169 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12170 ins_encode %{
12171 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12172 %}
12173 ins_pipe(ialu_reg);
12174 %}
12175
12176 // Shift Left by 8-bit immediate
12177 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12178 %{
12179 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12180 effect(KILL cr);
12181
12182 format %{ "salq $dst, $shift" %}
12183 ins_encode %{
12184 __ salq($dst$$Address, $shift$$constant);
12185 %}
12186 ins_pipe(ialu_mem_imm);
12187 %}
12188
12189 // Shift Left by variable
12190 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12191 %{
12192 predicate(!VM_Version::supports_bmi2());
12193 match(Set dst (LShiftL dst shift));
12194 effect(KILL cr);
12195
12196 format %{ "salq $dst, $shift" %}
12197 ins_encode %{
12198 __ salq($dst$$Register);
12199 %}
12200 ins_pipe(ialu_reg_reg);
12201 %}
12202
12203 // Shift Left by variable
12204 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12205 %{
12206 predicate(!VM_Version::supports_bmi2());
12207 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12208 effect(KILL cr);
12209
12210 format %{ "salq $dst, $shift" %}
12211 ins_encode %{
12212 __ salq($dst$$Address);
12213 %}
12214 ins_pipe(ialu_mem_reg);
12215 %}
12216
12217 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12218 %{
12219 predicate(VM_Version::supports_bmi2());
12220 match(Set dst (LShiftL src shift));
12221
12222 format %{ "shlxq $dst, $src, $shift" %}
12223 ins_encode %{
12224 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12225 %}
12226 ins_pipe(ialu_reg_reg);
12227 %}
12228
12229 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12230 %{
12231 predicate(VM_Version::supports_bmi2());
12232 match(Set dst (LShiftL (LoadL src) shift));
12233 ins_cost(175);
12234 format %{ "shlxq $dst, $src, $shift" %}
12235 ins_encode %{
12236 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12237 %}
12238 ins_pipe(ialu_reg_mem);
12239 %}
12240
12241 // Arithmetic Shift Right by 8-bit immediate
12242 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12243 %{
12244 predicate(!UseAPX);
12245 match(Set dst (RShiftL dst shift));
12246 effect(KILL cr);
12247
12248 format %{ "sarq $dst, $shift" %}
12249 ins_encode %{
12250 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12251 %}
12252 ins_pipe(ialu_mem_imm);
12253 %}
12254
12255 // Arithmetic Shift Right by 8-bit immediate
12256 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12257 %{
12258 predicate(UseAPX);
12259 match(Set dst (RShiftL src shift));
12260 effect(KILL cr);
12261
12262 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12263 ins_encode %{
12264 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12265 %}
12266 ins_pipe(ialu_mem_imm);
12267 %}
12268
12269 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12270 %{
12271 predicate(UseAPX);
12272 match(Set dst (RShiftL (LoadL src) shift));
12273 effect(KILL cr);
12274
12275 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12276 ins_encode %{
12277 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12278 %}
12279 ins_pipe(ialu_mem_imm);
12280 %}
12281
12282 // Arithmetic Shift Right by 8-bit immediate
12283 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12284 %{
12285 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12286 effect(KILL cr);
12287
12288 format %{ "sarq $dst, $shift" %}
12289 ins_encode %{
12290 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12291 %}
12292 ins_pipe(ialu_mem_imm);
12293 %}
12294
12295 // Arithmetic Shift Right by variable
12296 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12297 %{
12298 predicate(!VM_Version::supports_bmi2());
12299 match(Set dst (RShiftL dst shift));
12300 effect(KILL cr);
12301
12302 format %{ "sarq $dst, $shift" %}
12303 ins_encode %{
12304 __ sarq($dst$$Register);
12305 %}
12306 ins_pipe(ialu_reg_reg);
12307 %}
12308
12309 // Arithmetic Shift Right by variable
12310 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12311 %{
12312 predicate(!VM_Version::supports_bmi2());
12313 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12314 effect(KILL cr);
12315
12316 format %{ "sarq $dst, $shift" %}
12317 ins_encode %{
12318 __ sarq($dst$$Address);
12319 %}
12320 ins_pipe(ialu_mem_reg);
12321 %}
12322
12323 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12324 %{
12325 predicate(VM_Version::supports_bmi2());
12326 match(Set dst (RShiftL src shift));
12327
12328 format %{ "sarxq $dst, $src, $shift" %}
12329 ins_encode %{
12330 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12331 %}
12332 ins_pipe(ialu_reg_reg);
12333 %}
12334
12335 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12336 %{
12337 predicate(VM_Version::supports_bmi2());
12338 match(Set dst (RShiftL (LoadL src) shift));
12339 ins_cost(175);
12340 format %{ "sarxq $dst, $src, $shift" %}
12341 ins_encode %{
12342 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12343 %}
12344 ins_pipe(ialu_reg_mem);
12345 %}
12346
12347 // Logical Shift Right by 8-bit immediate
12348 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12349 %{
12350 predicate(!UseAPX);
12351 match(Set dst (URShiftL dst shift));
12352 effect(KILL cr);
12353
12354 format %{ "shrq $dst, $shift" %}
12355 ins_encode %{
12356 __ shrq($dst$$Register, $shift$$constant);
12357 %}
12358 ins_pipe(ialu_reg);
12359 %}
12360
12361 // Logical Shift Right by 8-bit immediate
12362 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12363 %{
12364 predicate(UseAPX);
12365 match(Set dst (URShiftL src shift));
12366 effect(KILL cr);
12367
12368 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12369 ins_encode %{
12370 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12371 %}
12372 ins_pipe(ialu_reg);
12373 %}
12374
12375 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12376 %{
12377 predicate(UseAPX);
12378 match(Set dst (URShiftL (LoadL src) shift));
12379 effect(KILL cr);
12380
12381 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12382 ins_encode %{
12383 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12384 %}
12385 ins_pipe(ialu_reg);
12386 %}
12387
12388 // Logical Shift Right by 8-bit immediate
12389 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12390 %{
12391 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12392 effect(KILL cr);
12393
12394 format %{ "shrq $dst, $shift" %}
12395 ins_encode %{
12396 __ shrq($dst$$Address, $shift$$constant);
12397 %}
12398 ins_pipe(ialu_mem_imm);
12399 %}
12400
12401 // Logical Shift Right by variable
12402 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12403 %{
12404 predicate(!VM_Version::supports_bmi2());
12405 match(Set dst (URShiftL dst shift));
12406 effect(KILL cr);
12407
12408 format %{ "shrq $dst, $shift" %}
12409 ins_encode %{
12410 __ shrq($dst$$Register);
12411 %}
12412 ins_pipe(ialu_reg_reg);
12413 %}
12414
12415 // Logical Shift Right by variable
12416 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12417 %{
12418 predicate(!VM_Version::supports_bmi2());
12419 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12420 effect(KILL cr);
12421
12422 format %{ "shrq $dst, $shift" %}
12423 ins_encode %{
12424 __ shrq($dst$$Address);
12425 %}
12426 ins_pipe(ialu_mem_reg);
12427 %}
12428
12429 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12430 %{
12431 predicate(VM_Version::supports_bmi2());
12432 match(Set dst (URShiftL src shift));
12433
12434 format %{ "shrxq $dst, $src, $shift" %}
12435 ins_encode %{
12436 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12437 %}
12438 ins_pipe(ialu_reg_reg);
12439 %}
12440
12441 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12442 %{
12443 predicate(VM_Version::supports_bmi2());
12444 match(Set dst (URShiftL (LoadL src) shift));
12445 ins_cost(175);
12446 format %{ "shrxq $dst, $src, $shift" %}
12447 ins_encode %{
12448 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12449 %}
12450 ins_pipe(ialu_reg_mem);
12451 %}
12452
12453 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12454 // This idiom is used by the compiler for the i2b bytecode.
12455 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12456 %{
12457 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12458
12459 format %{ "movsbl $dst, $src\t# i2b" %}
12460 ins_encode %{
12461 __ movsbl($dst$$Register, $src$$Register);
12462 %}
12463 ins_pipe(ialu_reg_reg);
12464 %}
12465
12466 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12467 // This idiom is used by the compiler the i2s bytecode.
12468 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12469 %{
12470 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12471
12472 format %{ "movswl $dst, $src\t# i2s" %}
12473 ins_encode %{
12474 __ movswl($dst$$Register, $src$$Register);
12475 %}
12476 ins_pipe(ialu_reg_reg);
12477 %}
12478
12479 // ROL/ROR instructions
12480
12481 // Rotate left by constant.
12482 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12483 %{
12484 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12485 match(Set dst (RotateLeft dst shift));
12486 effect(KILL cr);
12487 format %{ "roll $dst, $shift" %}
12488 ins_encode %{
12489 __ roll($dst$$Register, $shift$$constant);
12490 %}
12491 ins_pipe(ialu_reg);
12492 %}
12493
12494 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12495 %{
12496 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12497 match(Set dst (RotateLeft src shift));
12498 format %{ "rolxl $dst, $src, $shift" %}
12499 ins_encode %{
12500 int shift = 32 - ($shift$$constant & 31);
12501 __ rorxl($dst$$Register, $src$$Register, shift);
12502 %}
12503 ins_pipe(ialu_reg_reg);
12504 %}
12505
12506 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12507 %{
12508 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12509 match(Set dst (RotateLeft (LoadI src) shift));
12510 ins_cost(175);
12511 format %{ "rolxl $dst, $src, $shift" %}
12512 ins_encode %{
12513 int shift = 32 - ($shift$$constant & 31);
12514 __ rorxl($dst$$Register, $src$$Address, shift);
12515 %}
12516 ins_pipe(ialu_reg_mem);
12517 %}
12518
12519 // Rotate Left by variable
12520 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12521 %{
12522 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12523 match(Set dst (RotateLeft dst shift));
12524 effect(KILL cr);
12525 format %{ "roll $dst, $shift" %}
12526 ins_encode %{
12527 __ roll($dst$$Register);
12528 %}
12529 ins_pipe(ialu_reg_reg);
12530 %}
12531
12532 // Rotate Left by variable
12533 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12534 %{
12535 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12536 match(Set dst (RotateLeft src shift));
12537 effect(KILL cr);
12538
12539 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12540 ins_encode %{
12541 __ eroll($dst$$Register, $src$$Register, false);
12542 %}
12543 ins_pipe(ialu_reg_reg);
12544 %}
12545
12546 // Rotate Right by constant.
12547 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12548 %{
12549 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12550 match(Set dst (RotateRight dst shift));
12551 effect(KILL cr);
12552 format %{ "rorl $dst, $shift" %}
12553 ins_encode %{
12554 __ rorl($dst$$Register, $shift$$constant);
12555 %}
12556 ins_pipe(ialu_reg);
12557 %}
12558
12559 // Rotate Right by constant.
12560 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12561 %{
12562 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12563 match(Set dst (RotateRight src shift));
12564 format %{ "rorxl $dst, $src, $shift" %}
12565 ins_encode %{
12566 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12567 %}
12568 ins_pipe(ialu_reg_reg);
12569 %}
12570
12571 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12572 %{
12573 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12574 match(Set dst (RotateRight (LoadI src) shift));
12575 ins_cost(175);
12576 format %{ "rorxl $dst, $src, $shift" %}
12577 ins_encode %{
12578 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12579 %}
12580 ins_pipe(ialu_reg_mem);
12581 %}
12582
12583 // Rotate Right by variable
12584 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12585 %{
12586 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12587 match(Set dst (RotateRight dst shift));
12588 effect(KILL cr);
12589 format %{ "rorl $dst, $shift" %}
12590 ins_encode %{
12591 __ rorl($dst$$Register);
12592 %}
12593 ins_pipe(ialu_reg_reg);
12594 %}
12595
12596 // Rotate Right by variable
12597 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12598 %{
12599 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12600 match(Set dst (RotateRight src shift));
12601 effect(KILL cr);
12602
12603 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12604 ins_encode %{
12605 __ erorl($dst$$Register, $src$$Register, false);
12606 %}
12607 ins_pipe(ialu_reg_reg);
12608 %}
12609
12610 // Rotate Left by constant.
12611 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12612 %{
12613 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12614 match(Set dst (RotateLeft dst shift));
12615 effect(KILL cr);
12616 format %{ "rolq $dst, $shift" %}
12617 ins_encode %{
12618 __ rolq($dst$$Register, $shift$$constant);
12619 %}
12620 ins_pipe(ialu_reg);
12621 %}
12622
12623 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12624 %{
12625 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12626 match(Set dst (RotateLeft src shift));
12627 format %{ "rolxq $dst, $src, $shift" %}
12628 ins_encode %{
12629 int shift = 64 - ($shift$$constant & 63);
12630 __ rorxq($dst$$Register, $src$$Register, shift);
12631 %}
12632 ins_pipe(ialu_reg_reg);
12633 %}
12634
12635 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12636 %{
12637 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12638 match(Set dst (RotateLeft (LoadL src) shift));
12639 ins_cost(175);
12640 format %{ "rolxq $dst, $src, $shift" %}
12641 ins_encode %{
12642 int shift = 64 - ($shift$$constant & 63);
12643 __ rorxq($dst$$Register, $src$$Address, shift);
12644 %}
12645 ins_pipe(ialu_reg_mem);
12646 %}
12647
12648 // Rotate Left by variable
12649 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12650 %{
12651 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12652 match(Set dst (RotateLeft dst shift));
12653 effect(KILL cr);
12654 format %{ "rolq $dst, $shift" %}
12655 ins_encode %{
12656 __ rolq($dst$$Register);
12657 %}
12658 ins_pipe(ialu_reg_reg);
12659 %}
12660
12661 // Rotate Left by variable
12662 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12663 %{
12664 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12665 match(Set dst (RotateLeft src shift));
12666 effect(KILL cr);
12667
12668 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12669 ins_encode %{
12670 __ erolq($dst$$Register, $src$$Register, false);
12671 %}
12672 ins_pipe(ialu_reg_reg);
12673 %}
12674
12675 // Rotate Right by constant.
12676 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12677 %{
12678 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12679 match(Set dst (RotateRight dst shift));
12680 effect(KILL cr);
12681 format %{ "rorq $dst, $shift" %}
12682 ins_encode %{
12683 __ rorq($dst$$Register, $shift$$constant);
12684 %}
12685 ins_pipe(ialu_reg);
12686 %}
12687
12688 // Rotate Right by constant
12689 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12690 %{
12691 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12692 match(Set dst (RotateRight src shift));
12693 format %{ "rorxq $dst, $src, $shift" %}
12694 ins_encode %{
12695 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12696 %}
12697 ins_pipe(ialu_reg_reg);
12698 %}
12699
12700 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12701 %{
12702 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12703 match(Set dst (RotateRight (LoadL src) shift));
12704 ins_cost(175);
12705 format %{ "rorxq $dst, $src, $shift" %}
12706 ins_encode %{
12707 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12708 %}
12709 ins_pipe(ialu_reg_mem);
12710 %}
12711
12712 // Rotate Right by variable
12713 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12714 %{
12715 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12716 match(Set dst (RotateRight dst shift));
12717 effect(KILL cr);
12718 format %{ "rorq $dst, $shift" %}
12719 ins_encode %{
12720 __ rorq($dst$$Register);
12721 %}
12722 ins_pipe(ialu_reg_reg);
12723 %}
12724
12725 // Rotate Right by variable
12726 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12727 %{
12728 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12729 match(Set dst (RotateRight src shift));
12730 effect(KILL cr);
12731
12732 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12733 ins_encode %{
12734 __ erorq($dst$$Register, $src$$Register, false);
12735 %}
12736 ins_pipe(ialu_reg_reg);
12737 %}
12738
12739 //----------------------------- CompressBits/ExpandBits ------------------------
12740
12741 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12742 predicate(n->bottom_type()->isa_long());
12743 match(Set dst (CompressBits src mask));
12744 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12745 ins_encode %{
12746 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12747 %}
12748 ins_pipe( pipe_slow );
12749 %}
12750
12751 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12752 predicate(n->bottom_type()->isa_long());
12753 match(Set dst (ExpandBits src mask));
12754 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12755 ins_encode %{
12756 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12757 %}
12758 ins_pipe( pipe_slow );
12759 %}
12760
12761 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12762 predicate(n->bottom_type()->isa_long());
12763 match(Set dst (CompressBits src (LoadL mask)));
12764 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12765 ins_encode %{
12766 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12767 %}
12768 ins_pipe( pipe_slow );
12769 %}
12770
12771 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12772 predicate(n->bottom_type()->isa_long());
12773 match(Set dst (ExpandBits src (LoadL mask)));
12774 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12775 ins_encode %{
12776 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12777 %}
12778 ins_pipe( pipe_slow );
12779 %}
12780
12781
12782 // Logical Instructions
12783
12784 // Integer Logical Instructions
12785
12786 // And Instructions
12787 // And Register with Register
12788 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12789 %{
12790 predicate(!UseAPX);
12791 match(Set dst (AndI dst src));
12792 effect(KILL cr);
12793 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12794
12795 format %{ "andl $dst, $src\t# int" %}
12796 ins_encode %{
12797 __ andl($dst$$Register, $src$$Register);
12798 %}
12799 ins_pipe(ialu_reg_reg);
12800 %}
12801
12802 // And Register with Register using New Data Destination (NDD)
12803 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12804 %{
12805 predicate(UseAPX);
12806 match(Set dst (AndI src1 src2));
12807 effect(KILL cr);
12808 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12809
12810 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12811 ins_encode %{
12812 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12813
12814 %}
12815 ins_pipe(ialu_reg_reg);
12816 %}
12817
12818 // And Register with Immediate 255
12819 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12820 %{
12821 match(Set dst (AndI src mask));
12822
12823 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12824 ins_encode %{
12825 __ movzbl($dst$$Register, $src$$Register);
12826 %}
12827 ins_pipe(ialu_reg);
12828 %}
12829
12830 // And Register with Immediate 255 and promote to long
12831 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12832 %{
12833 match(Set dst (ConvI2L (AndI src mask)));
12834
12835 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12836 ins_encode %{
12837 __ movzbl($dst$$Register, $src$$Register);
12838 %}
12839 ins_pipe(ialu_reg);
12840 %}
12841
12842 // And Register with Immediate 65535
12843 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12844 %{
12845 match(Set dst (AndI src mask));
12846
12847 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12848 ins_encode %{
12849 __ movzwl($dst$$Register, $src$$Register);
12850 %}
12851 ins_pipe(ialu_reg);
12852 %}
12853
12854 // And Register with Immediate 65535 and promote to long
12855 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12856 %{
12857 match(Set dst (ConvI2L (AndI src mask)));
12858
12859 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12860 ins_encode %{
12861 __ movzwl($dst$$Register, $src$$Register);
12862 %}
12863 ins_pipe(ialu_reg);
12864 %}
12865
12866 // Can skip int2long conversions after AND with small bitmask
12867 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12868 %{
12869 predicate(VM_Version::supports_bmi2());
12870 ins_cost(125);
12871 effect(TEMP tmp, KILL cr);
12872 match(Set dst (ConvI2L (AndI src mask)));
12873 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12874 ins_encode %{
12875 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12876 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12877 %}
12878 ins_pipe(ialu_reg_reg);
12879 %}
12880
12881 // And Register with Immediate
12882 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12883 %{
12884 predicate(!UseAPX);
12885 match(Set dst (AndI dst src));
12886 effect(KILL cr);
12887 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12888
12889 format %{ "andl $dst, $src\t# int" %}
12890 ins_encode %{
12891 __ andl($dst$$Register, $src$$constant);
12892 %}
12893 ins_pipe(ialu_reg);
12894 %}
12895
12896 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12897 %{
12898 predicate(UseAPX);
12899 match(Set dst (AndI src1 src2));
12900 effect(KILL cr);
12901 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12902
12903 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12904 ins_encode %{
12905 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12906 %}
12907 ins_pipe(ialu_reg);
12908 %}
12909
12910 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12911 %{
12912 predicate(UseAPX);
12913 match(Set dst (AndI (LoadI src1) src2));
12914 effect(KILL cr);
12915 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12916
12917 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12918 ins_encode %{
12919 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12920 %}
12921 ins_pipe(ialu_reg);
12922 %}
12923
12924 // And Register with Memory
12925 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12926 %{
12927 predicate(!UseAPX);
12928 match(Set dst (AndI dst (LoadI src)));
12929 effect(KILL cr);
12930 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12931
12932 ins_cost(150);
12933 format %{ "andl $dst, $src\t# int" %}
12934 ins_encode %{
12935 __ andl($dst$$Register, $src$$Address);
12936 %}
12937 ins_pipe(ialu_reg_mem);
12938 %}
12939
12940 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12941 %{
12942 predicate(UseAPX);
12943 match(Set dst (AndI src1 (LoadI src2)));
12944 effect(KILL cr);
12945 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12946
12947 ins_cost(150);
12948 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12949 ins_encode %{
12950 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
12951 %}
12952 ins_pipe(ialu_reg_mem);
12953 %}
12954
12955 // And Memory with Register
12956 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12957 %{
12958 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12959 effect(KILL cr);
12960 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12961
12962 ins_cost(150);
12963 format %{ "andb $dst, $src\t# byte" %}
12964 ins_encode %{
12965 __ andb($dst$$Address, $src$$Register);
12966 %}
12967 ins_pipe(ialu_mem_reg);
12968 %}
12969
12970 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12971 %{
12972 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12973 effect(KILL cr);
12974 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12975
12976 ins_cost(150);
12977 format %{ "andl $dst, $src\t# int" %}
12978 ins_encode %{
12979 __ andl($dst$$Address, $src$$Register);
12980 %}
12981 ins_pipe(ialu_mem_reg);
12982 %}
12983
12984 // And Memory with Immediate
12985 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12986 %{
12987 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12988 effect(KILL cr);
12989 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12990
12991 ins_cost(125);
12992 format %{ "andl $dst, $src\t# int" %}
12993 ins_encode %{
12994 __ andl($dst$$Address, $src$$constant);
12995 %}
12996 ins_pipe(ialu_mem_imm);
12997 %}
12998
12999 // BMI1 instructions
13000 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13001 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13002 predicate(UseBMI1Instructions);
13003 effect(KILL cr);
13004 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13005
13006 ins_cost(125);
13007 format %{ "andnl $dst, $src1, $src2" %}
13008
13009 ins_encode %{
13010 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13011 %}
13012 ins_pipe(ialu_reg_mem);
13013 %}
13014
13015 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13016 match(Set dst (AndI (XorI src1 minus_1) src2));
13017 predicate(UseBMI1Instructions);
13018 effect(KILL cr);
13019 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13020
13021 format %{ "andnl $dst, $src1, $src2" %}
13022
13023 ins_encode %{
13024 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13025 %}
13026 ins_pipe(ialu_reg);
13027 %}
13028
13029 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13030 match(Set dst (AndI (SubI imm_zero src) src));
13031 predicate(UseBMI1Instructions);
13032 effect(KILL cr);
13033 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13034
13035 format %{ "blsil $dst, $src" %}
13036
13037 ins_encode %{
13038 __ blsil($dst$$Register, $src$$Register);
13039 %}
13040 ins_pipe(ialu_reg);
13041 %}
13042
13043 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13044 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13045 predicate(UseBMI1Instructions);
13046 effect(KILL cr);
13047 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13048
13049 ins_cost(125);
13050 format %{ "blsil $dst, $src" %}
13051
13052 ins_encode %{
13053 __ blsil($dst$$Register, $src$$Address);
13054 %}
13055 ins_pipe(ialu_reg_mem);
13056 %}
13057
13058 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13059 %{
13060 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13061 predicate(UseBMI1Instructions);
13062 effect(KILL cr);
13063 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13064
13065 ins_cost(125);
13066 format %{ "blsmskl $dst, $src" %}
13067
13068 ins_encode %{
13069 __ blsmskl($dst$$Register, $src$$Address);
13070 %}
13071 ins_pipe(ialu_reg_mem);
13072 %}
13073
13074 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13075 %{
13076 match(Set dst (XorI (AddI src minus_1) src));
13077 predicate(UseBMI1Instructions);
13078 effect(KILL cr);
13079 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13080
13081 format %{ "blsmskl $dst, $src" %}
13082
13083 ins_encode %{
13084 __ blsmskl($dst$$Register, $src$$Register);
13085 %}
13086
13087 ins_pipe(ialu_reg);
13088 %}
13089
13090 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13091 %{
13092 match(Set dst (AndI (AddI src minus_1) src) );
13093 predicate(UseBMI1Instructions);
13094 effect(KILL cr);
13095 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13096
13097 format %{ "blsrl $dst, $src" %}
13098
13099 ins_encode %{
13100 __ blsrl($dst$$Register, $src$$Register);
13101 %}
13102
13103 ins_pipe(ialu_reg_mem);
13104 %}
13105
13106 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13107 %{
13108 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13109 predicate(UseBMI1Instructions);
13110 effect(KILL cr);
13111 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13112
13113 ins_cost(125);
13114 format %{ "blsrl $dst, $src" %}
13115
13116 ins_encode %{
13117 __ blsrl($dst$$Register, $src$$Address);
13118 %}
13119
13120 ins_pipe(ialu_reg);
13121 %}
13122
13123 // Or Instructions
13124 // Or Register with Register
13125 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13126 %{
13127 predicate(!UseAPX);
13128 match(Set dst (OrI dst src));
13129 effect(KILL cr);
13130 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13131
13132 format %{ "orl $dst, $src\t# int" %}
13133 ins_encode %{
13134 __ orl($dst$$Register, $src$$Register);
13135 %}
13136 ins_pipe(ialu_reg_reg);
13137 %}
13138
13139 // Or Register with Register using New Data Destination (NDD)
13140 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13141 %{
13142 predicate(UseAPX);
13143 match(Set dst (OrI src1 src2));
13144 effect(KILL cr);
13145 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13146
13147 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13148 ins_encode %{
13149 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13150 %}
13151 ins_pipe(ialu_reg_reg);
13152 %}
13153
13154 // Or Register with Immediate
13155 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13156 %{
13157 predicate(!UseAPX);
13158 match(Set dst (OrI dst src));
13159 effect(KILL cr);
13160 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13161
13162 format %{ "orl $dst, $src\t# int" %}
13163 ins_encode %{
13164 __ orl($dst$$Register, $src$$constant);
13165 %}
13166 ins_pipe(ialu_reg);
13167 %}
13168
13169 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13170 %{
13171 predicate(UseAPX);
13172 match(Set dst (OrI src1 src2));
13173 effect(KILL cr);
13174 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13175
13176 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13177 ins_encode %{
13178 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13179 %}
13180 ins_pipe(ialu_reg);
13181 %}
13182
13183 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13184 %{
13185 predicate(UseAPX);
13186 match(Set dst (OrI src1 src2));
13187 effect(KILL cr);
13188 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13189
13190 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13191 ins_encode %{
13192 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13193 %}
13194 ins_pipe(ialu_reg);
13195 %}
13196
13197 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13198 %{
13199 predicate(UseAPX);
13200 match(Set dst (OrI (LoadI src1) src2));
13201 effect(KILL cr);
13202 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13203
13204 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13205 ins_encode %{
13206 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13207 %}
13208 ins_pipe(ialu_reg);
13209 %}
13210
13211 // Or Register with Memory
13212 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13213 %{
13214 predicate(!UseAPX);
13215 match(Set dst (OrI dst (LoadI src)));
13216 effect(KILL cr);
13217 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13218
13219 ins_cost(150);
13220 format %{ "orl $dst, $src\t# int" %}
13221 ins_encode %{
13222 __ orl($dst$$Register, $src$$Address);
13223 %}
13224 ins_pipe(ialu_reg_mem);
13225 %}
13226
13227 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13228 %{
13229 predicate(UseAPX);
13230 match(Set dst (OrI src1 (LoadI src2)));
13231 effect(KILL cr);
13232 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13233
13234 ins_cost(150);
13235 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13236 ins_encode %{
13237 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13238 %}
13239 ins_pipe(ialu_reg_mem);
13240 %}
13241
13242 // Or Memory with Register
13243 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13244 %{
13245 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13246 effect(KILL cr);
13247 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13248
13249 ins_cost(150);
13250 format %{ "orb $dst, $src\t# byte" %}
13251 ins_encode %{
13252 __ orb($dst$$Address, $src$$Register);
13253 %}
13254 ins_pipe(ialu_mem_reg);
13255 %}
13256
13257 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13258 %{
13259 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13260 effect(KILL cr);
13261 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13262
13263 ins_cost(150);
13264 format %{ "orl $dst, $src\t# int" %}
13265 ins_encode %{
13266 __ orl($dst$$Address, $src$$Register);
13267 %}
13268 ins_pipe(ialu_mem_reg);
13269 %}
13270
13271 // Or Memory with Immediate
13272 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13273 %{
13274 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13275 effect(KILL cr);
13276 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13277
13278 ins_cost(125);
13279 format %{ "orl $dst, $src\t# int" %}
13280 ins_encode %{
13281 __ orl($dst$$Address, $src$$constant);
13282 %}
13283 ins_pipe(ialu_mem_imm);
13284 %}
13285
13286 // Xor Instructions
13287 // Xor Register with Register
13288 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13289 %{
13290 predicate(!UseAPX);
13291 match(Set dst (XorI dst src));
13292 effect(KILL cr);
13293 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13294
13295 format %{ "xorl $dst, $src\t# int" %}
13296 ins_encode %{
13297 __ xorl($dst$$Register, $src$$Register);
13298 %}
13299 ins_pipe(ialu_reg_reg);
13300 %}
13301
13302 // Xor Register with Register using New Data Destination (NDD)
13303 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13304 %{
13305 predicate(UseAPX);
13306 match(Set dst (XorI src1 src2));
13307 effect(KILL cr);
13308 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13309
13310 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13311 ins_encode %{
13312 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13313 %}
13314 ins_pipe(ialu_reg_reg);
13315 %}
13316
13317 // Xor Register with Immediate -1
13318 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13319 %{
13320 predicate(!UseAPX);
13321 match(Set dst (XorI dst imm));
13322
13323 format %{ "notl $dst" %}
13324 ins_encode %{
13325 __ notl($dst$$Register);
13326 %}
13327 ins_pipe(ialu_reg);
13328 %}
13329
13330 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13331 %{
13332 match(Set dst (XorI src imm));
13333 predicate(UseAPX);
13334
13335 format %{ "enotl $dst, $src" %}
13336 ins_encode %{
13337 __ enotl($dst$$Register, $src$$Register);
13338 %}
13339 ins_pipe(ialu_reg);
13340 %}
13341
13342 // Xor Register with Immediate
13343 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13344 %{
13345 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13346 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13347 match(Set dst (XorI dst src));
13348 effect(KILL cr);
13349 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13350
13351 format %{ "xorl $dst, $src\t# int" %}
13352 ins_encode %{
13353 __ xorl($dst$$Register, $src$$constant);
13354 %}
13355 ins_pipe(ialu_reg);
13356 %}
13357
13358 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13359 %{
13360 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13361 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13362 match(Set dst (XorI src1 src2));
13363 effect(KILL cr);
13364 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13365
13366 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13367 ins_encode %{
13368 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13369 %}
13370 ins_pipe(ialu_reg);
13371 %}
13372
13373 // Xor Memory with Immediate
13374 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13375 %{
13376 predicate(UseAPX);
13377 match(Set dst (XorI (LoadI src1) src2));
13378 effect(KILL cr);
13379 ins_cost(150);
13380 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13381
13382 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13383 ins_encode %{
13384 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13385 %}
13386 ins_pipe(ialu_reg);
13387 %}
13388
13389 // Xor Register with Memory
13390 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13391 %{
13392 predicate(!UseAPX);
13393 match(Set dst (XorI dst (LoadI src)));
13394 effect(KILL cr);
13395 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13396
13397 ins_cost(150);
13398 format %{ "xorl $dst, $src\t# int" %}
13399 ins_encode %{
13400 __ xorl($dst$$Register, $src$$Address);
13401 %}
13402 ins_pipe(ialu_reg_mem);
13403 %}
13404
13405 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13406 %{
13407 predicate(UseAPX);
13408 match(Set dst (XorI src1 (LoadI src2)));
13409 effect(KILL cr);
13410 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13411
13412 ins_cost(150);
13413 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13414 ins_encode %{
13415 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13416 %}
13417 ins_pipe(ialu_reg_mem);
13418 %}
13419
13420 // Xor Memory with Register
13421 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13422 %{
13423 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13424 effect(KILL cr);
13425 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13426
13427 ins_cost(150);
13428 format %{ "xorb $dst, $src\t# byte" %}
13429 ins_encode %{
13430 __ xorb($dst$$Address, $src$$Register);
13431 %}
13432 ins_pipe(ialu_mem_reg);
13433 %}
13434
13435 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13436 %{
13437 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13438 effect(KILL cr);
13439 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13440
13441 ins_cost(150);
13442 format %{ "xorl $dst, $src\t# int" %}
13443 ins_encode %{
13444 __ xorl($dst$$Address, $src$$Register);
13445 %}
13446 ins_pipe(ialu_mem_reg);
13447 %}
13448
13449 // Xor Memory with Immediate
13450 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13451 %{
13452 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13453 effect(KILL cr);
13454 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13455
13456 ins_cost(125);
13457 format %{ "xorl $dst, $src\t# int" %}
13458 ins_encode %{
13459 __ xorl($dst$$Address, $src$$constant);
13460 %}
13461 ins_pipe(ialu_mem_imm);
13462 %}
13463
13464
13465 // Long Logical Instructions
13466
13467 // And Instructions
13468 // And Register with Register
13469 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13470 %{
13471 predicate(!UseAPX);
13472 match(Set dst (AndL dst src));
13473 effect(KILL cr);
13474 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13475
13476 format %{ "andq $dst, $src\t# long" %}
13477 ins_encode %{
13478 __ andq($dst$$Register, $src$$Register);
13479 %}
13480 ins_pipe(ialu_reg_reg);
13481 %}
13482
13483 // And Register with Register using New Data Destination (NDD)
13484 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13485 %{
13486 predicate(UseAPX);
13487 match(Set dst (AndL src1 src2));
13488 effect(KILL cr);
13489 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13490
13491 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13492 ins_encode %{
13493 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13494
13495 %}
13496 ins_pipe(ialu_reg_reg);
13497 %}
13498
13499 // And Register with Immediate 255
13500 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13501 %{
13502 match(Set dst (AndL src mask));
13503
13504 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13505 ins_encode %{
13506 // movzbl zeroes out the upper 32-bit and does not need REX.W
13507 __ movzbl($dst$$Register, $src$$Register);
13508 %}
13509 ins_pipe(ialu_reg);
13510 %}
13511
13512 // And Register with Immediate 65535
13513 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13514 %{
13515 match(Set dst (AndL src mask));
13516
13517 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13518 ins_encode %{
13519 // movzwl zeroes out the upper 32-bit and does not need REX.W
13520 __ movzwl($dst$$Register, $src$$Register);
13521 %}
13522 ins_pipe(ialu_reg);
13523 %}
13524
13525 // And Register with Immediate
13526 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13527 %{
13528 predicate(!UseAPX);
13529 match(Set dst (AndL dst src));
13530 effect(KILL cr);
13531 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13532
13533 format %{ "andq $dst, $src\t# long" %}
13534 ins_encode %{
13535 __ andq($dst$$Register, $src$$constant);
13536 %}
13537 ins_pipe(ialu_reg);
13538 %}
13539
13540 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13541 %{
13542 predicate(UseAPX);
13543 match(Set dst (AndL src1 src2));
13544 effect(KILL cr);
13545 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13546
13547 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13548 ins_encode %{
13549 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13550 %}
13551 ins_pipe(ialu_reg);
13552 %}
13553
13554 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13555 %{
13556 predicate(UseAPX);
13557 match(Set dst (AndL (LoadL src1) src2));
13558 effect(KILL cr);
13559 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13560
13561 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13562 ins_encode %{
13563 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13564 %}
13565 ins_pipe(ialu_reg);
13566 %}
13567
13568 // And Register with Memory
13569 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13570 %{
13571 predicate(!UseAPX);
13572 match(Set dst (AndL dst (LoadL src)));
13573 effect(KILL cr);
13574 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13575
13576 ins_cost(150);
13577 format %{ "andq $dst, $src\t# long" %}
13578 ins_encode %{
13579 __ andq($dst$$Register, $src$$Address);
13580 %}
13581 ins_pipe(ialu_reg_mem);
13582 %}
13583
13584 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13585 %{
13586 predicate(UseAPX);
13587 match(Set dst (AndL src1 (LoadL src2)));
13588 effect(KILL cr);
13589 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13590
13591 ins_cost(150);
13592 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13593 ins_encode %{
13594 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13595 %}
13596 ins_pipe(ialu_reg_mem);
13597 %}
13598
13599 // And Memory with Register
13600 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13601 %{
13602 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13603 effect(KILL cr);
13604 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13605
13606 ins_cost(150);
13607 format %{ "andq $dst, $src\t# long" %}
13608 ins_encode %{
13609 __ andq($dst$$Address, $src$$Register);
13610 %}
13611 ins_pipe(ialu_mem_reg);
13612 %}
13613
13614 // And Memory with Immediate
13615 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13616 %{
13617 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13618 effect(KILL cr);
13619 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13620
13621 ins_cost(125);
13622 format %{ "andq $dst, $src\t# long" %}
13623 ins_encode %{
13624 __ andq($dst$$Address, $src$$constant);
13625 %}
13626 ins_pipe(ialu_mem_imm);
13627 %}
13628
13629 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13630 %{
13631 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13632 // because AND/OR works well enough for 8/32-bit values.
13633 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13634
13635 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13636 effect(KILL cr);
13637
13638 ins_cost(125);
13639 format %{ "btrq $dst, log2(not($con))\t# long" %}
13640 ins_encode %{
13641 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13642 %}
13643 ins_pipe(ialu_mem_imm);
13644 %}
13645
13646 // BMI1 instructions
13647 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13648 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13649 predicate(UseBMI1Instructions);
13650 effect(KILL cr);
13651 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13652
13653 ins_cost(125);
13654 format %{ "andnq $dst, $src1, $src2" %}
13655
13656 ins_encode %{
13657 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13658 %}
13659 ins_pipe(ialu_reg_mem);
13660 %}
13661
13662 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13663 match(Set dst (AndL (XorL src1 minus_1) src2));
13664 predicate(UseBMI1Instructions);
13665 effect(KILL cr);
13666 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13667
13668 format %{ "andnq $dst, $src1, $src2" %}
13669
13670 ins_encode %{
13671 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13672 %}
13673 ins_pipe(ialu_reg_mem);
13674 %}
13675
13676 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13677 match(Set dst (AndL (SubL imm_zero src) src));
13678 predicate(UseBMI1Instructions);
13679 effect(KILL cr);
13680 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13681
13682 format %{ "blsiq $dst, $src" %}
13683
13684 ins_encode %{
13685 __ blsiq($dst$$Register, $src$$Register);
13686 %}
13687 ins_pipe(ialu_reg);
13688 %}
13689
13690 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13691 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13692 predicate(UseBMI1Instructions);
13693 effect(KILL cr);
13694 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13695
13696 ins_cost(125);
13697 format %{ "blsiq $dst, $src" %}
13698
13699 ins_encode %{
13700 __ blsiq($dst$$Register, $src$$Address);
13701 %}
13702 ins_pipe(ialu_reg_mem);
13703 %}
13704
13705 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13706 %{
13707 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13708 predicate(UseBMI1Instructions);
13709 effect(KILL cr);
13710 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13711
13712 ins_cost(125);
13713 format %{ "blsmskq $dst, $src" %}
13714
13715 ins_encode %{
13716 __ blsmskq($dst$$Register, $src$$Address);
13717 %}
13718 ins_pipe(ialu_reg_mem);
13719 %}
13720
13721 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13722 %{
13723 match(Set dst (XorL (AddL src minus_1) src));
13724 predicate(UseBMI1Instructions);
13725 effect(KILL cr);
13726 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13727
13728 format %{ "blsmskq $dst, $src" %}
13729
13730 ins_encode %{
13731 __ blsmskq($dst$$Register, $src$$Register);
13732 %}
13733
13734 ins_pipe(ialu_reg);
13735 %}
13736
13737 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13738 %{
13739 match(Set dst (AndL (AddL src minus_1) src) );
13740 predicate(UseBMI1Instructions);
13741 effect(KILL cr);
13742 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13743
13744 format %{ "blsrq $dst, $src" %}
13745
13746 ins_encode %{
13747 __ blsrq($dst$$Register, $src$$Register);
13748 %}
13749
13750 ins_pipe(ialu_reg);
13751 %}
13752
13753 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13754 %{
13755 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13756 predicate(UseBMI1Instructions);
13757 effect(KILL cr);
13758 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13759
13760 ins_cost(125);
13761 format %{ "blsrq $dst, $src" %}
13762
13763 ins_encode %{
13764 __ blsrq($dst$$Register, $src$$Address);
13765 %}
13766
13767 ins_pipe(ialu_reg);
13768 %}
13769
13770 // Or Instructions
13771 // Or Register with Register
13772 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13773 %{
13774 predicate(!UseAPX);
13775 match(Set dst (OrL dst src));
13776 effect(KILL cr);
13777 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13778
13779 format %{ "orq $dst, $src\t# long" %}
13780 ins_encode %{
13781 __ orq($dst$$Register, $src$$Register);
13782 %}
13783 ins_pipe(ialu_reg_reg);
13784 %}
13785
13786 // Or Register with Register using New Data Destination (NDD)
13787 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13788 %{
13789 predicate(UseAPX);
13790 match(Set dst (OrL src1 src2));
13791 effect(KILL cr);
13792 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13793
13794 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13795 ins_encode %{
13796 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13797
13798 %}
13799 ins_pipe(ialu_reg_reg);
13800 %}
13801
13802 // Use any_RegP to match R15 (TLS register) without spilling.
13803 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13804 match(Set dst (OrL dst (CastP2X src)));
13805 effect(KILL cr);
13806 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13807
13808 format %{ "orq $dst, $src\t# long" %}
13809 ins_encode %{
13810 __ orq($dst$$Register, $src$$Register);
13811 %}
13812 ins_pipe(ialu_reg_reg);
13813 %}
13814
13815 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13816 match(Set dst (OrL src1 (CastP2X src2)));
13817 effect(KILL cr);
13818 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13819
13820 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13821 ins_encode %{
13822 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13823 %}
13824 ins_pipe(ialu_reg_reg);
13825 %}
13826
13827 // Or Register with Immediate
13828 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13829 %{
13830 predicate(!UseAPX);
13831 match(Set dst (OrL dst src));
13832 effect(KILL cr);
13833 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13834
13835 format %{ "orq $dst, $src\t# long" %}
13836 ins_encode %{
13837 __ orq($dst$$Register, $src$$constant);
13838 %}
13839 ins_pipe(ialu_reg);
13840 %}
13841
13842 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13843 %{
13844 predicate(UseAPX);
13845 match(Set dst (OrL src1 src2));
13846 effect(KILL cr);
13847 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13848
13849 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13850 ins_encode %{
13851 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13852 %}
13853 ins_pipe(ialu_reg);
13854 %}
13855
13856 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13857 %{
13858 predicate(UseAPX);
13859 match(Set dst (OrL src1 src2));
13860 effect(KILL cr);
13861 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13862
13863 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13864 ins_encode %{
13865 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13866 %}
13867 ins_pipe(ialu_reg);
13868 %}
13869
13870 // Or Memory with Immediate
13871 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13872 %{
13873 predicate(UseAPX);
13874 match(Set dst (OrL (LoadL src1) src2));
13875 effect(KILL cr);
13876 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13877
13878 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13879 ins_encode %{
13880 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13881 %}
13882 ins_pipe(ialu_reg);
13883 %}
13884
13885 // Or Register with Memory
13886 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13887 %{
13888 predicate(!UseAPX);
13889 match(Set dst (OrL dst (LoadL src)));
13890 effect(KILL cr);
13891 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13892
13893 ins_cost(150);
13894 format %{ "orq $dst, $src\t# long" %}
13895 ins_encode %{
13896 __ orq($dst$$Register, $src$$Address);
13897 %}
13898 ins_pipe(ialu_reg_mem);
13899 %}
13900
13901 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13902 %{
13903 predicate(UseAPX);
13904 match(Set dst (OrL src1 (LoadL src2)));
13905 effect(KILL cr);
13906 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13907
13908 ins_cost(150);
13909 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13910 ins_encode %{
13911 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13912 %}
13913 ins_pipe(ialu_reg_mem);
13914 %}
13915
13916 // Or Memory with Register
13917 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13918 %{
13919 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13920 effect(KILL cr);
13921 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13922
13923 ins_cost(150);
13924 format %{ "orq $dst, $src\t# long" %}
13925 ins_encode %{
13926 __ orq($dst$$Address, $src$$Register);
13927 %}
13928 ins_pipe(ialu_mem_reg);
13929 %}
13930
13931 // Or Memory with Immediate
13932 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13933 %{
13934 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13935 effect(KILL cr);
13936 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13937
13938 ins_cost(125);
13939 format %{ "orq $dst, $src\t# long" %}
13940 ins_encode %{
13941 __ orq($dst$$Address, $src$$constant);
13942 %}
13943 ins_pipe(ialu_mem_imm);
13944 %}
13945
13946 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13947 %{
13948 // con should be a pure 64-bit power of 2 immediate
13949 // because AND/OR works well enough for 8/32-bit values.
13950 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13951
13952 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13953 effect(KILL cr);
13954
13955 ins_cost(125);
13956 format %{ "btsq $dst, log2($con)\t# long" %}
13957 ins_encode %{
13958 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13959 %}
13960 ins_pipe(ialu_mem_imm);
13961 %}
13962
13963 // Xor Instructions
13964 // Xor Register with Register
13965 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13966 %{
13967 predicate(!UseAPX);
13968 match(Set dst (XorL dst src));
13969 effect(KILL cr);
13970 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13971
13972 format %{ "xorq $dst, $src\t# long" %}
13973 ins_encode %{
13974 __ xorq($dst$$Register, $src$$Register);
13975 %}
13976 ins_pipe(ialu_reg_reg);
13977 %}
13978
13979 // Xor Register with Register using New Data Destination (NDD)
13980 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13981 %{
13982 predicate(UseAPX);
13983 match(Set dst (XorL src1 src2));
13984 effect(KILL cr);
13985 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986
13987 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13988 ins_encode %{
13989 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13990 %}
13991 ins_pipe(ialu_reg_reg);
13992 %}
13993
13994 // Xor Register with Immediate -1
13995 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13996 %{
13997 predicate(!UseAPX);
13998 match(Set dst (XorL dst imm));
13999
14000 format %{ "notq $dst" %}
14001 ins_encode %{
14002 __ notq($dst$$Register);
14003 %}
14004 ins_pipe(ialu_reg);
14005 %}
14006
14007 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14008 %{
14009 predicate(UseAPX);
14010 match(Set dst (XorL src imm));
14011
14012 format %{ "enotq $dst, $src" %}
14013 ins_encode %{
14014 __ enotq($dst$$Register, $src$$Register);
14015 %}
14016 ins_pipe(ialu_reg);
14017 %}
14018
14019 // Xor Register with Immediate
14020 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14021 %{
14022 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14023 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14024 match(Set dst (XorL dst src));
14025 effect(KILL cr);
14026 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14027
14028 format %{ "xorq $dst, $src\t# long" %}
14029 ins_encode %{
14030 __ xorq($dst$$Register, $src$$constant);
14031 %}
14032 ins_pipe(ialu_reg);
14033 %}
14034
14035 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14036 %{
14037 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14038 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14039 match(Set dst (XorL src1 src2));
14040 effect(KILL cr);
14041 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14042
14043 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14044 ins_encode %{
14045 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14046 %}
14047 ins_pipe(ialu_reg);
14048 %}
14049
14050 // Xor Memory with Immediate
14051 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14052 %{
14053 predicate(UseAPX);
14054 match(Set dst (XorL (LoadL src1) src2));
14055 effect(KILL cr);
14056 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14057 ins_cost(150);
14058
14059 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14060 ins_encode %{
14061 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14062 %}
14063 ins_pipe(ialu_reg);
14064 %}
14065
14066 // Xor Register with Memory
14067 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14068 %{
14069 predicate(!UseAPX);
14070 match(Set dst (XorL dst (LoadL src)));
14071 effect(KILL cr);
14072 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14073
14074 ins_cost(150);
14075 format %{ "xorq $dst, $src\t# long" %}
14076 ins_encode %{
14077 __ xorq($dst$$Register, $src$$Address);
14078 %}
14079 ins_pipe(ialu_reg_mem);
14080 %}
14081
14082 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14083 %{
14084 predicate(UseAPX);
14085 match(Set dst (XorL src1 (LoadL src2)));
14086 effect(KILL cr);
14087 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14088
14089 ins_cost(150);
14090 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14091 ins_encode %{
14092 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14093 %}
14094 ins_pipe(ialu_reg_mem);
14095 %}
14096
14097 // Xor Memory with Register
14098 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14099 %{
14100 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14101 effect(KILL cr);
14102 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14103
14104 ins_cost(150);
14105 format %{ "xorq $dst, $src\t# long" %}
14106 ins_encode %{
14107 __ xorq($dst$$Address, $src$$Register);
14108 %}
14109 ins_pipe(ialu_mem_reg);
14110 %}
14111
14112 // Xor Memory with Immediate
14113 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14114 %{
14115 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14116 effect(KILL cr);
14117 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14118
14119 ins_cost(125);
14120 format %{ "xorq $dst, $src\t# long" %}
14121 ins_encode %{
14122 __ xorq($dst$$Address, $src$$constant);
14123 %}
14124 ins_pipe(ialu_mem_imm);
14125 %}
14126
14127 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14128 %{
14129 match(Set dst (CmpLTMask p q));
14130 effect(KILL cr);
14131
14132 ins_cost(400);
14133 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14134 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14135 "negl $dst" %}
14136 ins_encode %{
14137 __ cmpl($p$$Register, $q$$Register);
14138 __ setcc(Assembler::less, $dst$$Register);
14139 __ negl($dst$$Register);
14140 %}
14141 ins_pipe(pipe_slow);
14142 %}
14143
14144 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14145 %{
14146 match(Set dst (CmpLTMask dst zero));
14147 effect(KILL cr);
14148
14149 ins_cost(100);
14150 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14151 ins_encode %{
14152 __ sarl($dst$$Register, 31);
14153 %}
14154 ins_pipe(ialu_reg);
14155 %}
14156
14157 /* Better to save a register than avoid a branch */
14158 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14159 %{
14160 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14161 effect(KILL cr);
14162 ins_cost(300);
14163 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14164 "jge done\n\t"
14165 "addl $p,$y\n"
14166 "done: " %}
14167 ins_encode %{
14168 Register Rp = $p$$Register;
14169 Register Rq = $q$$Register;
14170 Register Ry = $y$$Register;
14171 Label done;
14172 __ subl(Rp, Rq);
14173 __ jccb(Assembler::greaterEqual, done);
14174 __ addl(Rp, Ry);
14175 __ bind(done);
14176 %}
14177 ins_pipe(pipe_cmplt);
14178 %}
14179
14180 /* Better to save a register than avoid a branch */
14181 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14182 %{
14183 match(Set y (AndI (CmpLTMask p q) y));
14184 effect(KILL cr);
14185
14186 ins_cost(300);
14187
14188 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14189 "jlt done\n\t"
14190 "xorl $y, $y\n"
14191 "done: " %}
14192 ins_encode %{
14193 Register Rp = $p$$Register;
14194 Register Rq = $q$$Register;
14195 Register Ry = $y$$Register;
14196 Label done;
14197 __ cmpl(Rp, Rq);
14198 __ jccb(Assembler::less, done);
14199 __ xorl(Ry, Ry);
14200 __ bind(done);
14201 %}
14202 ins_pipe(pipe_cmplt);
14203 %}
14204
14205
14206 //---------- FP Instructions------------------------------------------------
14207
14208 // Really expensive, avoid
14209 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14210 %{
14211 match(Set cr (CmpF src1 src2));
14212
14213 ins_cost(500);
14214 format %{ "ucomiss $src1, $src2\n\t"
14215 "jnp,s exit\n\t"
14216 "pushfq\t# saw NaN, set CF\n\t"
14217 "andq [rsp], #0xffffff2b\n\t"
14218 "popfq\n"
14219 "exit:" %}
14220 ins_encode %{
14221 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14222 emit_cmpfp_fixup(masm);
14223 %}
14224 ins_pipe(pipe_slow);
14225 %}
14226
14227 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14228 match(Set cr (CmpF src1 src2));
14229
14230 ins_cost(100);
14231 format %{ "ucomiss $src1, $src2" %}
14232 ins_encode %{
14233 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14234 %}
14235 ins_pipe(pipe_slow);
14236 %}
14237
14238 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14239 match(Set cr (CmpF src1 (LoadF src2)));
14240
14241 ins_cost(100);
14242 format %{ "ucomiss $src1, $src2" %}
14243 ins_encode %{
14244 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14245 %}
14246 ins_pipe(pipe_slow);
14247 %}
14248
14249 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14250 match(Set cr (CmpF src con));
14251 ins_cost(100);
14252 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14253 ins_encode %{
14254 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14255 %}
14256 ins_pipe(pipe_slow);
14257 %}
14258
14259 // Really expensive, avoid
14260 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14261 %{
14262 match(Set cr (CmpD src1 src2));
14263
14264 ins_cost(500);
14265 format %{ "ucomisd $src1, $src2\n\t"
14266 "jnp,s exit\n\t"
14267 "pushfq\t# saw NaN, set CF\n\t"
14268 "andq [rsp], #0xffffff2b\n\t"
14269 "popfq\n"
14270 "exit:" %}
14271 ins_encode %{
14272 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14273 emit_cmpfp_fixup(masm);
14274 %}
14275 ins_pipe(pipe_slow);
14276 %}
14277
14278 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14279 match(Set cr (CmpD src1 src2));
14280
14281 ins_cost(100);
14282 format %{ "ucomisd $src1, $src2 test" %}
14283 ins_encode %{
14284 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14285 %}
14286 ins_pipe(pipe_slow);
14287 %}
14288
14289 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14290 match(Set cr (CmpD src1 (LoadD src2)));
14291
14292 ins_cost(100);
14293 format %{ "ucomisd $src1, $src2" %}
14294 ins_encode %{
14295 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14296 %}
14297 ins_pipe(pipe_slow);
14298 %}
14299
14300 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14301 match(Set cr (CmpD src con));
14302 ins_cost(100);
14303 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14304 ins_encode %{
14305 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14306 %}
14307 ins_pipe(pipe_slow);
14308 %}
14309
14310 // Compare into -1,0,1
14311 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14312 %{
14313 match(Set dst (CmpF3 src1 src2));
14314 effect(KILL cr);
14315
14316 ins_cost(275);
14317 format %{ "ucomiss $src1, $src2\n\t"
14318 "movl $dst, #-1\n\t"
14319 "jp,s done\n\t"
14320 "jb,s done\n\t"
14321 "setne $dst\n\t"
14322 "movzbl $dst, $dst\n"
14323 "done:" %}
14324 ins_encode %{
14325 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14326 emit_cmpfp3(masm, $dst$$Register);
14327 %}
14328 ins_pipe(pipe_slow);
14329 %}
14330
14331 // Compare into -1,0,1
14332 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14333 %{
14334 match(Set dst (CmpF3 src1 (LoadF src2)));
14335 effect(KILL cr);
14336
14337 ins_cost(275);
14338 format %{ "ucomiss $src1, $src2\n\t"
14339 "movl $dst, #-1\n\t"
14340 "jp,s done\n\t"
14341 "jb,s done\n\t"
14342 "setne $dst\n\t"
14343 "movzbl $dst, $dst\n"
14344 "done:" %}
14345 ins_encode %{
14346 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14347 emit_cmpfp3(masm, $dst$$Register);
14348 %}
14349 ins_pipe(pipe_slow);
14350 %}
14351
14352 // Compare into -1,0,1
14353 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14354 match(Set dst (CmpF3 src con));
14355 effect(KILL cr);
14356
14357 ins_cost(275);
14358 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14359 "movl $dst, #-1\n\t"
14360 "jp,s done\n\t"
14361 "jb,s done\n\t"
14362 "setne $dst\n\t"
14363 "movzbl $dst, $dst\n"
14364 "done:" %}
14365 ins_encode %{
14366 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14367 emit_cmpfp3(masm, $dst$$Register);
14368 %}
14369 ins_pipe(pipe_slow);
14370 %}
14371
14372 // Compare into -1,0,1
14373 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14374 %{
14375 match(Set dst (CmpD3 src1 src2));
14376 effect(KILL cr);
14377
14378 ins_cost(275);
14379 format %{ "ucomisd $src1, $src2\n\t"
14380 "movl $dst, #-1\n\t"
14381 "jp,s done\n\t"
14382 "jb,s done\n\t"
14383 "setne $dst\n\t"
14384 "movzbl $dst, $dst\n"
14385 "done:" %}
14386 ins_encode %{
14387 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14388 emit_cmpfp3(masm, $dst$$Register);
14389 %}
14390 ins_pipe(pipe_slow);
14391 %}
14392
14393 // Compare into -1,0,1
14394 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14395 %{
14396 match(Set dst (CmpD3 src1 (LoadD src2)));
14397 effect(KILL cr);
14398
14399 ins_cost(275);
14400 format %{ "ucomisd $src1, $src2\n\t"
14401 "movl $dst, #-1\n\t"
14402 "jp,s done\n\t"
14403 "jb,s done\n\t"
14404 "setne $dst\n\t"
14405 "movzbl $dst, $dst\n"
14406 "done:" %}
14407 ins_encode %{
14408 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14409 emit_cmpfp3(masm, $dst$$Register);
14410 %}
14411 ins_pipe(pipe_slow);
14412 %}
14413
14414 // Compare into -1,0,1
14415 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14416 match(Set dst (CmpD3 src con));
14417 effect(KILL cr);
14418
14419 ins_cost(275);
14420 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14421 "movl $dst, #-1\n\t"
14422 "jp,s done\n\t"
14423 "jb,s done\n\t"
14424 "setne $dst\n\t"
14425 "movzbl $dst, $dst\n"
14426 "done:" %}
14427 ins_encode %{
14428 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14429 emit_cmpfp3(masm, $dst$$Register);
14430 %}
14431 ins_pipe(pipe_slow);
14432 %}
14433
14434 //----------Arithmetic Conversion Instructions---------------------------------
14435
14436 instruct convF2D_reg_reg(regD dst, regF src)
14437 %{
14438 match(Set dst (ConvF2D src));
14439
14440 format %{ "cvtss2sd $dst, $src" %}
14441 ins_encode %{
14442 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14443 %}
14444 ins_pipe(pipe_slow); // XXX
14445 %}
14446
14447 instruct convF2D_reg_mem(regD dst, memory src)
14448 %{
14449 predicate(UseAVX == 0);
14450 match(Set dst (ConvF2D (LoadF src)));
14451
14452 format %{ "cvtss2sd $dst, $src" %}
14453 ins_encode %{
14454 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14455 %}
14456 ins_pipe(pipe_slow); // XXX
14457 %}
14458
14459 instruct convD2F_reg_reg(regF dst, regD src)
14460 %{
14461 match(Set dst (ConvD2F src));
14462
14463 format %{ "cvtsd2ss $dst, $src" %}
14464 ins_encode %{
14465 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14466 %}
14467 ins_pipe(pipe_slow); // XXX
14468 %}
14469
14470 instruct convD2F_reg_mem(regF dst, memory src)
14471 %{
14472 predicate(UseAVX == 0);
14473 match(Set dst (ConvD2F (LoadD src)));
14474
14475 format %{ "cvtsd2ss $dst, $src" %}
14476 ins_encode %{
14477 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14478 %}
14479 ins_pipe(pipe_slow); // XXX
14480 %}
14481
14482 // XXX do mem variants
14483 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14484 %{
14485 predicate(!VM_Version::supports_avx10_2());
14486 match(Set dst (ConvF2I src));
14487 effect(KILL cr);
14488 format %{ "convert_f2i $dst, $src" %}
14489 ins_encode %{
14490 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14491 %}
14492 ins_pipe(pipe_slow);
14493 %}
14494
14495 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14496 %{
14497 predicate(VM_Version::supports_avx10_2());
14498 match(Set dst (ConvF2I src));
14499 format %{ "evcvttss2sisl $dst, $src" %}
14500 ins_encode %{
14501 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14502 %}
14503 ins_pipe(pipe_slow);
14504 %}
14505
14506 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14507 %{
14508 predicate(VM_Version::supports_avx10_2());
14509 match(Set dst (ConvF2I (LoadF src)));
14510 format %{ "evcvttss2sisl $dst, $src" %}
14511 ins_encode %{
14512 __ evcvttss2sisl($dst$$Register, $src$$Address);
14513 %}
14514 ins_pipe(pipe_slow);
14515 %}
14516
14517 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14518 %{
14519 predicate(!VM_Version::supports_avx10_2());
14520 match(Set dst (ConvF2L src));
14521 effect(KILL cr);
14522 format %{ "convert_f2l $dst, $src"%}
14523 ins_encode %{
14524 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14525 %}
14526 ins_pipe(pipe_slow);
14527 %}
14528
14529 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14530 %{
14531 predicate(VM_Version::supports_avx10_2());
14532 match(Set dst (ConvF2L src));
14533 format %{ "evcvttss2sisq $dst, $src" %}
14534 ins_encode %{
14535 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14536 %}
14537 ins_pipe(pipe_slow);
14538 %}
14539
14540 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14541 %{
14542 predicate(VM_Version::supports_avx10_2());
14543 match(Set dst (ConvF2L (LoadF src)));
14544 format %{ "evcvttss2sisq $dst, $src" %}
14545 ins_encode %{
14546 __ evcvttss2sisq($dst$$Register, $src$$Address);
14547 %}
14548 ins_pipe(pipe_slow);
14549 %}
14550
14551 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14552 %{
14553 predicate(!VM_Version::supports_avx10_2());
14554 match(Set dst (ConvD2I src));
14555 effect(KILL cr);
14556 format %{ "convert_d2i $dst, $src"%}
14557 ins_encode %{
14558 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14559 %}
14560 ins_pipe(pipe_slow);
14561 %}
14562
14563 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14564 %{
14565 predicate(VM_Version::supports_avx10_2());
14566 match(Set dst (ConvD2I src));
14567 format %{ "evcvttsd2sisl $dst, $src" %}
14568 ins_encode %{
14569 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14570 %}
14571 ins_pipe(pipe_slow);
14572 %}
14573
14574 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14575 %{
14576 predicate(VM_Version::supports_avx10_2());
14577 match(Set dst (ConvD2I (LoadD src)));
14578 format %{ "evcvttsd2sisl $dst, $src" %}
14579 ins_encode %{
14580 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14581 %}
14582 ins_pipe(pipe_slow);
14583 %}
14584
14585 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14586 %{
14587 predicate(!VM_Version::supports_avx10_2());
14588 match(Set dst (ConvD2L src));
14589 effect(KILL cr);
14590 format %{ "convert_d2l $dst, $src"%}
14591 ins_encode %{
14592 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14593 %}
14594 ins_pipe(pipe_slow);
14595 %}
14596
14597 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14598 %{
14599 predicate(VM_Version::supports_avx10_2());
14600 match(Set dst (ConvD2L src));
14601 format %{ "evcvttsd2sisq $dst, $src" %}
14602 ins_encode %{
14603 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14604 %}
14605 ins_pipe(pipe_slow);
14606 %}
14607
14608 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14609 %{
14610 predicate(VM_Version::supports_avx10_2());
14611 match(Set dst (ConvD2L (LoadD src)));
14612 format %{ "evcvttsd2sisq $dst, $src" %}
14613 ins_encode %{
14614 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14615 %}
14616 ins_pipe(pipe_slow);
14617 %}
14618
14619 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14620 %{
14621 match(Set dst (RoundD src));
14622 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14623 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14624 ins_encode %{
14625 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14626 %}
14627 ins_pipe(pipe_slow);
14628 %}
14629
14630 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14631 %{
14632 match(Set dst (RoundF src));
14633 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14634 format %{ "round_float $dst,$src" %}
14635 ins_encode %{
14636 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14637 %}
14638 ins_pipe(pipe_slow);
14639 %}
14640
14641 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14642 %{
14643 predicate(!UseXmmI2F);
14644 match(Set dst (ConvI2F src));
14645
14646 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14647 ins_encode %{
14648 if (UseAVX > 0) {
14649 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14650 }
14651 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14652 %}
14653 ins_pipe(pipe_slow); // XXX
14654 %}
14655
14656 instruct convI2F_reg_mem(regF dst, memory src)
14657 %{
14658 predicate(UseAVX == 0);
14659 match(Set dst (ConvI2F (LoadI src)));
14660
14661 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14662 ins_encode %{
14663 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14664 %}
14665 ins_pipe(pipe_slow); // XXX
14666 %}
14667
14668 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14669 %{
14670 predicate(!UseXmmI2D);
14671 match(Set dst (ConvI2D src));
14672
14673 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14674 ins_encode %{
14675 if (UseAVX > 0) {
14676 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14677 }
14678 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14679 %}
14680 ins_pipe(pipe_slow); // XXX
14681 %}
14682
14683 instruct convI2D_reg_mem(regD dst, memory src)
14684 %{
14685 predicate(UseAVX == 0);
14686 match(Set dst (ConvI2D (LoadI src)));
14687
14688 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14689 ins_encode %{
14690 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14691 %}
14692 ins_pipe(pipe_slow); // XXX
14693 %}
14694
14695 instruct convXI2F_reg(regF dst, rRegI src)
14696 %{
14697 predicate(UseXmmI2F);
14698 match(Set dst (ConvI2F src));
14699
14700 format %{ "movdl $dst, $src\n\t"
14701 "cvtdq2psl $dst, $dst\t# i2f" %}
14702 ins_encode %{
14703 __ movdl($dst$$XMMRegister, $src$$Register);
14704 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14705 %}
14706 ins_pipe(pipe_slow); // XXX
14707 %}
14708
14709 instruct convXI2D_reg(regD dst, rRegI src)
14710 %{
14711 predicate(UseXmmI2D);
14712 match(Set dst (ConvI2D src));
14713
14714 format %{ "movdl $dst, $src\n\t"
14715 "cvtdq2pdl $dst, $dst\t# i2d" %}
14716 ins_encode %{
14717 __ movdl($dst$$XMMRegister, $src$$Register);
14718 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14719 %}
14720 ins_pipe(pipe_slow); // XXX
14721 %}
14722
14723 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14724 %{
14725 match(Set dst (ConvL2F src));
14726
14727 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14728 ins_encode %{
14729 if (UseAVX > 0) {
14730 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14731 }
14732 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14733 %}
14734 ins_pipe(pipe_slow); // XXX
14735 %}
14736
14737 instruct convL2F_reg_mem(regF dst, memory src)
14738 %{
14739 predicate(UseAVX == 0);
14740 match(Set dst (ConvL2F (LoadL src)));
14741
14742 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14743 ins_encode %{
14744 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14745 %}
14746 ins_pipe(pipe_slow); // XXX
14747 %}
14748
14749 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14750 %{
14751 match(Set dst (ConvL2D src));
14752
14753 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14754 ins_encode %{
14755 if (UseAVX > 0) {
14756 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14757 }
14758 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14759 %}
14760 ins_pipe(pipe_slow); // XXX
14761 %}
14762
14763 instruct convL2D_reg_mem(regD dst, memory src)
14764 %{
14765 predicate(UseAVX == 0);
14766 match(Set dst (ConvL2D (LoadL src)));
14767
14768 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14769 ins_encode %{
14770 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14771 %}
14772 ins_pipe(pipe_slow); // XXX
14773 %}
14774
14775 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14776 %{
14777 match(Set dst (ConvI2L src));
14778
14779 ins_cost(125);
14780 format %{ "movslq $dst, $src\t# i2l" %}
14781 ins_encode %{
14782 __ movslq($dst$$Register, $src$$Register);
14783 %}
14784 ins_pipe(ialu_reg_reg);
14785 %}
14786
14787 // Zero-extend convert int to long
14788 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14789 %{
14790 match(Set dst (AndL (ConvI2L src) mask));
14791
14792 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14793 ins_encode %{
14794 if ($dst$$reg != $src$$reg) {
14795 __ movl($dst$$Register, $src$$Register);
14796 }
14797 %}
14798 ins_pipe(ialu_reg_reg);
14799 %}
14800
14801 // Zero-extend convert int to long
14802 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14803 %{
14804 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14805
14806 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14807 ins_encode %{
14808 __ movl($dst$$Register, $src$$Address);
14809 %}
14810 ins_pipe(ialu_reg_mem);
14811 %}
14812
14813 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14814 %{
14815 match(Set dst (AndL src mask));
14816
14817 format %{ "movl $dst, $src\t# zero-extend long" %}
14818 ins_encode %{
14819 __ movl($dst$$Register, $src$$Register);
14820 %}
14821 ins_pipe(ialu_reg_reg);
14822 %}
14823
14824 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14825 %{
14826 match(Set dst (ConvL2I src));
14827
14828 format %{ "movl $dst, $src\t# l2i" %}
14829 ins_encode %{
14830 __ movl($dst$$Register, $src$$Register);
14831 %}
14832 ins_pipe(ialu_reg_reg);
14833 %}
14834
14835
14836 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14837 match(Set dst (MoveF2I src));
14838 effect(DEF dst, USE src);
14839
14840 ins_cost(125);
14841 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14842 ins_encode %{
14843 __ movl($dst$$Register, Address(rsp, $src$$disp));
14844 %}
14845 ins_pipe(ialu_reg_mem);
14846 %}
14847
14848 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14849 match(Set dst (MoveI2F src));
14850 effect(DEF dst, USE src);
14851
14852 ins_cost(125);
14853 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14854 ins_encode %{
14855 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14856 %}
14857 ins_pipe(pipe_slow);
14858 %}
14859
14860 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14861 match(Set dst (MoveD2L src));
14862 effect(DEF dst, USE src);
14863
14864 ins_cost(125);
14865 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14866 ins_encode %{
14867 __ movq($dst$$Register, Address(rsp, $src$$disp));
14868 %}
14869 ins_pipe(ialu_reg_mem);
14870 %}
14871
14872 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14873 predicate(!UseXmmLoadAndClearUpper);
14874 match(Set dst (MoveL2D src));
14875 effect(DEF dst, USE src);
14876
14877 ins_cost(125);
14878 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14879 ins_encode %{
14880 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14881 %}
14882 ins_pipe(pipe_slow);
14883 %}
14884
14885 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14886 predicate(UseXmmLoadAndClearUpper);
14887 match(Set dst (MoveL2D src));
14888 effect(DEF dst, USE src);
14889
14890 ins_cost(125);
14891 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14892 ins_encode %{
14893 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14894 %}
14895 ins_pipe(pipe_slow);
14896 %}
14897
14898
14899 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14900 match(Set dst (MoveF2I src));
14901 effect(DEF dst, USE src);
14902
14903 ins_cost(95); // XXX
14904 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14905 ins_encode %{
14906 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14907 %}
14908 ins_pipe(pipe_slow);
14909 %}
14910
14911 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14912 match(Set dst (MoveI2F src));
14913 effect(DEF dst, USE src);
14914
14915 ins_cost(100);
14916 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14917 ins_encode %{
14918 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14919 %}
14920 ins_pipe( ialu_mem_reg );
14921 %}
14922
14923 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14924 match(Set dst (MoveD2L src));
14925 effect(DEF dst, USE src);
14926
14927 ins_cost(95); // XXX
14928 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14929 ins_encode %{
14930 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14931 %}
14932 ins_pipe(pipe_slow);
14933 %}
14934
14935 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14936 match(Set dst (MoveL2D src));
14937 effect(DEF dst, USE src);
14938
14939 ins_cost(100);
14940 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14941 ins_encode %{
14942 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14943 %}
14944 ins_pipe(ialu_mem_reg);
14945 %}
14946
14947 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14948 match(Set dst (MoveF2I src));
14949 effect(DEF dst, USE src);
14950 ins_cost(85);
14951 format %{ "movd $dst,$src\t# MoveF2I" %}
14952 ins_encode %{
14953 __ movdl($dst$$Register, $src$$XMMRegister);
14954 %}
14955 ins_pipe( pipe_slow );
14956 %}
14957
14958 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14959 match(Set dst (MoveD2L src));
14960 effect(DEF dst, USE src);
14961 ins_cost(85);
14962 format %{ "movd $dst,$src\t# MoveD2L" %}
14963 ins_encode %{
14964 __ movdq($dst$$Register, $src$$XMMRegister);
14965 %}
14966 ins_pipe( pipe_slow );
14967 %}
14968
14969 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14970 match(Set dst (MoveI2F src));
14971 effect(DEF dst, USE src);
14972 ins_cost(100);
14973 format %{ "movd $dst,$src\t# MoveI2F" %}
14974 ins_encode %{
14975 __ movdl($dst$$XMMRegister, $src$$Register);
14976 %}
14977 ins_pipe( pipe_slow );
14978 %}
14979
14980 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14981 match(Set dst (MoveL2D src));
14982 effect(DEF dst, USE src);
14983 ins_cost(100);
14984 format %{ "movd $dst,$src\t# MoveL2D" %}
14985 ins_encode %{
14986 __ movdq($dst$$XMMRegister, $src$$Register);
14987 %}
14988 ins_pipe( pipe_slow );
14989 %}
14990
14991 // Fast clearing of an array
14992 // Small non-constant lenght ClearArray for non-AVX512 targets.
14993 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14994 Universe dummy, rFlagsReg cr)
14995 %{
14996 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14997 match(Set dummy (ClearArray cnt base));
14998 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14999
15000 format %{ $$template
15001 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15002 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15003 $$emit$$"jg LARGE\n\t"
15004 $$emit$$"dec rcx\n\t"
15005 $$emit$$"js DONE\t# Zero length\n\t"
15006 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15007 $$emit$$"dec rcx\n\t"
15008 $$emit$$"jge LOOP\n\t"
15009 $$emit$$"jmp DONE\n\t"
15010 $$emit$$"# LARGE:\n\t"
15011 if (UseFastStosb) {
15012 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15013 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15014 } else if (UseXMMForObjInit) {
15015 $$emit$$"mov rdi,rax\n\t"
15016 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15017 $$emit$$"jmpq L_zero_64_bytes\n\t"
15018 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15019 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15020 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15021 $$emit$$"add 0x40,rax\n\t"
15022 $$emit$$"# L_zero_64_bytes:\n\t"
15023 $$emit$$"sub 0x8,rcx\n\t"
15024 $$emit$$"jge L_loop\n\t"
15025 $$emit$$"add 0x4,rcx\n\t"
15026 $$emit$$"jl L_tail\n\t"
15027 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15028 $$emit$$"add 0x20,rax\n\t"
15029 $$emit$$"sub 0x4,rcx\n\t"
15030 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15031 $$emit$$"add 0x4,rcx\n\t"
15032 $$emit$$"jle L_end\n\t"
15033 $$emit$$"dec rcx\n\t"
15034 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15035 $$emit$$"vmovq xmm0,(rax)\n\t"
15036 $$emit$$"add 0x8,rax\n\t"
15037 $$emit$$"dec rcx\n\t"
15038 $$emit$$"jge L_sloop\n\t"
15039 $$emit$$"# L_end:\n\t"
15040 } else {
15041 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15042 }
15043 $$emit$$"# DONE"
15044 %}
15045 ins_encode %{
15046 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15047 $tmp$$XMMRegister, false, knoreg);
15048 %}
15049 ins_pipe(pipe_slow);
15050 %}
15051
15052 // Small non-constant length ClearArray for AVX512 targets.
15053 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15054 Universe dummy, rFlagsReg cr)
15055 %{
15056 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15057 match(Set dummy (ClearArray cnt base));
15058 ins_cost(125);
15059 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15060
15061 format %{ $$template
15062 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15063 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15064 $$emit$$"jg LARGE\n\t"
15065 $$emit$$"dec rcx\n\t"
15066 $$emit$$"js DONE\t# Zero length\n\t"
15067 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15068 $$emit$$"dec rcx\n\t"
15069 $$emit$$"jge LOOP\n\t"
15070 $$emit$$"jmp DONE\n\t"
15071 $$emit$$"# LARGE:\n\t"
15072 if (UseFastStosb) {
15073 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15074 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15075 } else if (UseXMMForObjInit) {
15076 $$emit$$"mov rdi,rax\n\t"
15077 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15078 $$emit$$"jmpq L_zero_64_bytes\n\t"
15079 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15080 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15081 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15082 $$emit$$"add 0x40,rax\n\t"
15083 $$emit$$"# L_zero_64_bytes:\n\t"
15084 $$emit$$"sub 0x8,rcx\n\t"
15085 $$emit$$"jge L_loop\n\t"
15086 $$emit$$"add 0x4,rcx\n\t"
15087 $$emit$$"jl L_tail\n\t"
15088 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15089 $$emit$$"add 0x20,rax\n\t"
15090 $$emit$$"sub 0x4,rcx\n\t"
15091 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15092 $$emit$$"add 0x4,rcx\n\t"
15093 $$emit$$"jle L_end\n\t"
15094 $$emit$$"dec rcx\n\t"
15095 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15096 $$emit$$"vmovq xmm0,(rax)\n\t"
15097 $$emit$$"add 0x8,rax\n\t"
15098 $$emit$$"dec rcx\n\t"
15099 $$emit$$"jge L_sloop\n\t"
15100 $$emit$$"# L_end:\n\t"
15101 } else {
15102 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15103 }
15104 $$emit$$"# DONE"
15105 %}
15106 ins_encode %{
15107 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15108 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15109 %}
15110 ins_pipe(pipe_slow);
15111 %}
15112
15113 // Large non-constant length ClearArray for non-AVX512 targets.
15114 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15115 Universe dummy, rFlagsReg cr)
15116 %{
15117 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15118 match(Set dummy (ClearArray cnt base));
15119 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15120
15121 format %{ $$template
15122 if (UseFastStosb) {
15123 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15124 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15125 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15126 } else if (UseXMMForObjInit) {
15127 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15128 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15129 $$emit$$"jmpq L_zero_64_bytes\n\t"
15130 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15131 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15132 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15133 $$emit$$"add 0x40,rax\n\t"
15134 $$emit$$"# L_zero_64_bytes:\n\t"
15135 $$emit$$"sub 0x8,rcx\n\t"
15136 $$emit$$"jge L_loop\n\t"
15137 $$emit$$"add 0x4,rcx\n\t"
15138 $$emit$$"jl L_tail\n\t"
15139 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15140 $$emit$$"add 0x20,rax\n\t"
15141 $$emit$$"sub 0x4,rcx\n\t"
15142 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15143 $$emit$$"add 0x4,rcx\n\t"
15144 $$emit$$"jle L_end\n\t"
15145 $$emit$$"dec rcx\n\t"
15146 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15147 $$emit$$"vmovq xmm0,(rax)\n\t"
15148 $$emit$$"add 0x8,rax\n\t"
15149 $$emit$$"dec rcx\n\t"
15150 $$emit$$"jge L_sloop\n\t"
15151 $$emit$$"# L_end:\n\t"
15152 } else {
15153 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15154 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15155 }
15156 %}
15157 ins_encode %{
15158 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15159 $tmp$$XMMRegister, true, knoreg);
15160 %}
15161 ins_pipe(pipe_slow);
15162 %}
15163
15164 // Large non-constant length ClearArray for AVX512 targets.
15165 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15166 Universe dummy, rFlagsReg cr)
15167 %{
15168 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15169 match(Set dummy (ClearArray cnt base));
15170 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15171
15172 format %{ $$template
15173 if (UseFastStosb) {
15174 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15175 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15176 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15177 } else if (UseXMMForObjInit) {
15178 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15179 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15180 $$emit$$"jmpq L_zero_64_bytes\n\t"
15181 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15182 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15183 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15184 $$emit$$"add 0x40,rax\n\t"
15185 $$emit$$"# L_zero_64_bytes:\n\t"
15186 $$emit$$"sub 0x8,rcx\n\t"
15187 $$emit$$"jge L_loop\n\t"
15188 $$emit$$"add 0x4,rcx\n\t"
15189 $$emit$$"jl L_tail\n\t"
15190 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15191 $$emit$$"add 0x20,rax\n\t"
15192 $$emit$$"sub 0x4,rcx\n\t"
15193 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15194 $$emit$$"add 0x4,rcx\n\t"
15195 $$emit$$"jle L_end\n\t"
15196 $$emit$$"dec rcx\n\t"
15197 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15198 $$emit$$"vmovq xmm0,(rax)\n\t"
15199 $$emit$$"add 0x8,rax\n\t"
15200 $$emit$$"dec rcx\n\t"
15201 $$emit$$"jge L_sloop\n\t"
15202 $$emit$$"# L_end:\n\t"
15203 } else {
15204 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15205 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15206 }
15207 %}
15208 ins_encode %{
15209 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15210 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15211 %}
15212 ins_pipe(pipe_slow);
15213 %}
15214
15215 // Small constant length ClearArray for AVX512 targets.
15216 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15217 %{
15218 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15219 match(Set dummy (ClearArray cnt base));
15220 ins_cost(100);
15221 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15222 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15223 ins_encode %{
15224 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15225 %}
15226 ins_pipe(pipe_slow);
15227 %}
15228
15229 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15230 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15231 %{
15232 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15233 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15234 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15235
15236 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15237 ins_encode %{
15238 __ string_compare($str1$$Register, $str2$$Register,
15239 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15240 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15241 %}
15242 ins_pipe( pipe_slow );
15243 %}
15244
15245 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15246 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15247 %{
15248 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15249 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15250 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15251
15252 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15253 ins_encode %{
15254 __ string_compare($str1$$Register, $str2$$Register,
15255 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15256 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15257 %}
15258 ins_pipe( pipe_slow );
15259 %}
15260
15261 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15262 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15263 %{
15264 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15265 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15266 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15267
15268 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15269 ins_encode %{
15270 __ string_compare($str1$$Register, $str2$$Register,
15271 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15272 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15273 %}
15274 ins_pipe( pipe_slow );
15275 %}
15276
15277 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15278 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15279 %{
15280 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15281 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15282 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15283
15284 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15285 ins_encode %{
15286 __ string_compare($str1$$Register, $str2$$Register,
15287 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15288 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15289 %}
15290 ins_pipe( pipe_slow );
15291 %}
15292
15293 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15294 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15295 %{
15296 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15297 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15298 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15299
15300 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15301 ins_encode %{
15302 __ string_compare($str1$$Register, $str2$$Register,
15303 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15304 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15305 %}
15306 ins_pipe( pipe_slow );
15307 %}
15308
15309 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15310 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15311 %{
15312 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15313 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15314 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15315
15316 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15317 ins_encode %{
15318 __ string_compare($str1$$Register, $str2$$Register,
15319 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15320 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15321 %}
15322 ins_pipe( pipe_slow );
15323 %}
15324
15325 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15326 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15327 %{
15328 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15329 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15330 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15331
15332 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15333 ins_encode %{
15334 __ string_compare($str2$$Register, $str1$$Register,
15335 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15336 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15337 %}
15338 ins_pipe( pipe_slow );
15339 %}
15340
15341 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15342 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15343 %{
15344 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15345 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15346 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15347
15348 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15349 ins_encode %{
15350 __ string_compare($str2$$Register, $str1$$Register,
15351 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15352 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15353 %}
15354 ins_pipe( pipe_slow );
15355 %}
15356
15357 // fast search of substring with known size.
15358 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15359 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15360 %{
15361 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15362 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15363 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15364
15365 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15366 ins_encode %{
15367 int icnt2 = (int)$int_cnt2$$constant;
15368 if (icnt2 >= 16) {
15369 // IndexOf for constant substrings with size >= 16 elements
15370 // which don't need to be loaded through stack.
15371 __ string_indexofC8($str1$$Register, $str2$$Register,
15372 $cnt1$$Register, $cnt2$$Register,
15373 icnt2, $result$$Register,
15374 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15375 } else {
15376 // Small strings are loaded through stack if they cross page boundary.
15377 __ string_indexof($str1$$Register, $str2$$Register,
15378 $cnt1$$Register, $cnt2$$Register,
15379 icnt2, $result$$Register,
15380 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15381 }
15382 %}
15383 ins_pipe( pipe_slow );
15384 %}
15385
15386 // fast search of substring with known size.
15387 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15388 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15389 %{
15390 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15391 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15392 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15393
15394 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15395 ins_encode %{
15396 int icnt2 = (int)$int_cnt2$$constant;
15397 if (icnt2 >= 8) {
15398 // IndexOf for constant substrings with size >= 8 elements
15399 // which don't need to be loaded through stack.
15400 __ string_indexofC8($str1$$Register, $str2$$Register,
15401 $cnt1$$Register, $cnt2$$Register,
15402 icnt2, $result$$Register,
15403 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15404 } else {
15405 // Small strings are loaded through stack if they cross page boundary.
15406 __ string_indexof($str1$$Register, $str2$$Register,
15407 $cnt1$$Register, $cnt2$$Register,
15408 icnt2, $result$$Register,
15409 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15410 }
15411 %}
15412 ins_pipe( pipe_slow );
15413 %}
15414
15415 // fast search of substring with known size.
15416 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15417 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15418 %{
15419 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15420 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15421 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15422
15423 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15424 ins_encode %{
15425 int icnt2 = (int)$int_cnt2$$constant;
15426 if (icnt2 >= 8) {
15427 // IndexOf for constant substrings with size >= 8 elements
15428 // which don't need to be loaded through stack.
15429 __ string_indexofC8($str1$$Register, $str2$$Register,
15430 $cnt1$$Register, $cnt2$$Register,
15431 icnt2, $result$$Register,
15432 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15433 } else {
15434 // Small strings are loaded through stack if they cross page boundary.
15435 __ string_indexof($str1$$Register, $str2$$Register,
15436 $cnt1$$Register, $cnt2$$Register,
15437 icnt2, $result$$Register,
15438 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15439 }
15440 %}
15441 ins_pipe( pipe_slow );
15442 %}
15443
15444 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15445 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15446 %{
15447 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15448 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15449 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15450
15451 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15452 ins_encode %{
15453 __ string_indexof($str1$$Register, $str2$$Register,
15454 $cnt1$$Register, $cnt2$$Register,
15455 (-1), $result$$Register,
15456 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15457 %}
15458 ins_pipe( pipe_slow );
15459 %}
15460
15461 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15462 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15463 %{
15464 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15465 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15466 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15467
15468 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15469 ins_encode %{
15470 __ string_indexof($str1$$Register, $str2$$Register,
15471 $cnt1$$Register, $cnt2$$Register,
15472 (-1), $result$$Register,
15473 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15474 %}
15475 ins_pipe( pipe_slow );
15476 %}
15477
15478 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15479 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15480 %{
15481 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15482 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15483 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15484
15485 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15486 ins_encode %{
15487 __ string_indexof($str1$$Register, $str2$$Register,
15488 $cnt1$$Register, $cnt2$$Register,
15489 (-1), $result$$Register,
15490 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15491 %}
15492 ins_pipe( pipe_slow );
15493 %}
15494
15495 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15496 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15497 %{
15498 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15499 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15500 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15501 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15502 ins_encode %{
15503 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15504 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15505 %}
15506 ins_pipe( pipe_slow );
15507 %}
15508
15509 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15510 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15511 %{
15512 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15513 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15514 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15515 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15516 ins_encode %{
15517 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15518 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15519 %}
15520 ins_pipe( pipe_slow );
15521 %}
15522
15523 // fast string equals
15524 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15525 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15526 %{
15527 predicate(!VM_Version::supports_avx512vlbw());
15528 match(Set result (StrEquals (Binary str1 str2) cnt));
15529 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15530
15531 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15532 ins_encode %{
15533 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15534 $cnt$$Register, $result$$Register, $tmp3$$Register,
15535 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15536 %}
15537 ins_pipe( pipe_slow );
15538 %}
15539
15540 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15541 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15542 %{
15543 predicate(VM_Version::supports_avx512vlbw());
15544 match(Set result (StrEquals (Binary str1 str2) cnt));
15545 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15546
15547 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15548 ins_encode %{
15549 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15550 $cnt$$Register, $result$$Register, $tmp3$$Register,
15551 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15552 %}
15553 ins_pipe( pipe_slow );
15554 %}
15555
15556 // fast array equals
15557 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15558 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15559 %{
15560 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15561 match(Set result (AryEq ary1 ary2));
15562 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15563
15564 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15565 ins_encode %{
15566 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15567 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15568 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15569 %}
15570 ins_pipe( pipe_slow );
15571 %}
15572
15573 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15574 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15575 %{
15576 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15577 match(Set result (AryEq ary1 ary2));
15578 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15579
15580 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15581 ins_encode %{
15582 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15583 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15584 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15585 %}
15586 ins_pipe( pipe_slow );
15587 %}
15588
15589 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15590 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15591 %{
15592 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15593 match(Set result (AryEq ary1 ary2));
15594 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15595
15596 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15597 ins_encode %{
15598 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15599 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15600 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15601 %}
15602 ins_pipe( pipe_slow );
15603 %}
15604
15605 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15606 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15607 %{
15608 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15609 match(Set result (AryEq ary1 ary2));
15610 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15611
15612 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15613 ins_encode %{
15614 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15615 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15616 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15617 %}
15618 ins_pipe( pipe_slow );
15619 %}
15620
15621 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15622 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15623 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15624 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15625 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15626 %{
15627 predicate(UseAVX >= 2);
15628 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15629 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15630 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15631 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15632 USE basic_type, KILL cr);
15633
15634 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15635 ins_encode %{
15636 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15637 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15638 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15639 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15640 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15641 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15642 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15643 %}
15644 ins_pipe( pipe_slow );
15645 %}
15646
15647 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15648 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15649 %{
15650 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15651 match(Set result (CountPositives ary1 len));
15652 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15653
15654 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15655 ins_encode %{
15656 __ count_positives($ary1$$Register, $len$$Register,
15657 $result$$Register, $tmp3$$Register,
15658 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15659 %}
15660 ins_pipe( pipe_slow );
15661 %}
15662
15663 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15664 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15665 %{
15666 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15667 match(Set result (CountPositives ary1 len));
15668 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15669
15670 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15671 ins_encode %{
15672 __ count_positives($ary1$$Register, $len$$Register,
15673 $result$$Register, $tmp3$$Register,
15674 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15675 %}
15676 ins_pipe( pipe_slow );
15677 %}
15678
15679 // fast char[] to byte[] compression
15680 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15681 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15682 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15683 match(Set result (StrCompressedCopy src (Binary dst len)));
15684 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15685 USE_KILL len, KILL tmp5, KILL cr);
15686
15687 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15688 ins_encode %{
15689 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15690 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15691 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15692 knoreg, knoreg);
15693 %}
15694 ins_pipe( pipe_slow );
15695 %}
15696
15697 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15698 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15699 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15700 match(Set result (StrCompressedCopy src (Binary dst len)));
15701 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15702 USE_KILL len, KILL tmp5, KILL cr);
15703
15704 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15705 ins_encode %{
15706 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15707 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15708 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15709 $ktmp1$$KRegister, $ktmp2$$KRegister);
15710 %}
15711 ins_pipe( pipe_slow );
15712 %}
15713 // fast byte[] to char[] inflation
15714 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15715 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15716 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15717 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15718 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15719
15720 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15721 ins_encode %{
15722 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15723 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15724 %}
15725 ins_pipe( pipe_slow );
15726 %}
15727
15728 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15729 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15730 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15731 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15732 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15733
15734 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15735 ins_encode %{
15736 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15737 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15738 %}
15739 ins_pipe( pipe_slow );
15740 %}
15741
15742 // encode char[] to byte[] in ISO_8859_1
15743 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15744 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15745 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15746 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15747 match(Set result (EncodeISOArray src (Binary dst len)));
15748 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15749
15750 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15751 ins_encode %{
15752 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15753 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15754 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15755 %}
15756 ins_pipe( pipe_slow );
15757 %}
15758
15759 // encode char[] to byte[] in ASCII
15760 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15761 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15762 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15763 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15764 match(Set result (EncodeISOArray src (Binary dst len)));
15765 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15766
15767 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15768 ins_encode %{
15769 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15770 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15771 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15772 %}
15773 ins_pipe( pipe_slow );
15774 %}
15775
15776 //----------Overflow Math Instructions-----------------------------------------
15777
15778 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15779 %{
15780 match(Set cr (OverflowAddI op1 op2));
15781 effect(DEF cr, USE_KILL op1, USE op2);
15782
15783 format %{ "addl $op1, $op2\t# overflow check int" %}
15784
15785 ins_encode %{
15786 __ addl($op1$$Register, $op2$$Register);
15787 %}
15788 ins_pipe(ialu_reg_reg);
15789 %}
15790
15791 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15792 %{
15793 match(Set cr (OverflowAddI op1 op2));
15794 effect(DEF cr, USE_KILL op1, USE op2);
15795
15796 format %{ "addl $op1, $op2\t# overflow check int" %}
15797
15798 ins_encode %{
15799 __ addl($op1$$Register, $op2$$constant);
15800 %}
15801 ins_pipe(ialu_reg_reg);
15802 %}
15803
15804 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15805 %{
15806 match(Set cr (OverflowAddL op1 op2));
15807 effect(DEF cr, USE_KILL op1, USE op2);
15808
15809 format %{ "addq $op1, $op2\t# overflow check long" %}
15810 ins_encode %{
15811 __ addq($op1$$Register, $op2$$Register);
15812 %}
15813 ins_pipe(ialu_reg_reg);
15814 %}
15815
15816 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15817 %{
15818 match(Set cr (OverflowAddL op1 op2));
15819 effect(DEF cr, USE_KILL op1, USE op2);
15820
15821 format %{ "addq $op1, $op2\t# overflow check long" %}
15822 ins_encode %{
15823 __ addq($op1$$Register, $op2$$constant);
15824 %}
15825 ins_pipe(ialu_reg_reg);
15826 %}
15827
15828 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15829 %{
15830 match(Set cr (OverflowSubI op1 op2));
15831
15832 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15833 ins_encode %{
15834 __ cmpl($op1$$Register, $op2$$Register);
15835 %}
15836 ins_pipe(ialu_reg_reg);
15837 %}
15838
15839 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15840 %{
15841 match(Set cr (OverflowSubI op1 op2));
15842
15843 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15844 ins_encode %{
15845 __ cmpl($op1$$Register, $op2$$constant);
15846 %}
15847 ins_pipe(ialu_reg_reg);
15848 %}
15849
15850 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15851 %{
15852 match(Set cr (OverflowSubL op1 op2));
15853
15854 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15855 ins_encode %{
15856 __ cmpq($op1$$Register, $op2$$Register);
15857 %}
15858 ins_pipe(ialu_reg_reg);
15859 %}
15860
15861 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15862 %{
15863 match(Set cr (OverflowSubL op1 op2));
15864
15865 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15866 ins_encode %{
15867 __ cmpq($op1$$Register, $op2$$constant);
15868 %}
15869 ins_pipe(ialu_reg_reg);
15870 %}
15871
15872 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15873 %{
15874 match(Set cr (OverflowSubI zero op2));
15875 effect(DEF cr, USE_KILL op2);
15876
15877 format %{ "negl $op2\t# overflow check int" %}
15878 ins_encode %{
15879 __ negl($op2$$Register);
15880 %}
15881 ins_pipe(ialu_reg_reg);
15882 %}
15883
15884 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15885 %{
15886 match(Set cr (OverflowSubL zero op2));
15887 effect(DEF cr, USE_KILL op2);
15888
15889 format %{ "negq $op2\t# overflow check long" %}
15890 ins_encode %{
15891 __ negq($op2$$Register);
15892 %}
15893 ins_pipe(ialu_reg_reg);
15894 %}
15895
15896 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15897 %{
15898 match(Set cr (OverflowMulI op1 op2));
15899 effect(DEF cr, USE_KILL op1, USE op2);
15900
15901 format %{ "imull $op1, $op2\t# overflow check int" %}
15902 ins_encode %{
15903 __ imull($op1$$Register, $op2$$Register);
15904 %}
15905 ins_pipe(ialu_reg_reg_alu0);
15906 %}
15907
15908 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15909 %{
15910 match(Set cr (OverflowMulI op1 op2));
15911 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15912
15913 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15914 ins_encode %{
15915 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15916 %}
15917 ins_pipe(ialu_reg_reg_alu0);
15918 %}
15919
15920 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15921 %{
15922 match(Set cr (OverflowMulL op1 op2));
15923 effect(DEF cr, USE_KILL op1, USE op2);
15924
15925 format %{ "imulq $op1, $op2\t# overflow check long" %}
15926 ins_encode %{
15927 __ imulq($op1$$Register, $op2$$Register);
15928 %}
15929 ins_pipe(ialu_reg_reg_alu0);
15930 %}
15931
15932 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15933 %{
15934 match(Set cr (OverflowMulL op1 op2));
15935 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15936
15937 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15938 ins_encode %{
15939 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15940 %}
15941 ins_pipe(ialu_reg_reg_alu0);
15942 %}
15943
15944
15945 //----------Control Flow Instructions------------------------------------------
15946 // Signed compare Instructions
15947
15948 // XXX more variants!!
15949 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15950 %{
15951 match(Set cr (CmpI op1 op2));
15952 effect(DEF cr, USE op1, USE op2);
15953
15954 format %{ "cmpl $op1, $op2" %}
15955 ins_encode %{
15956 __ cmpl($op1$$Register, $op2$$Register);
15957 %}
15958 ins_pipe(ialu_cr_reg_reg);
15959 %}
15960
15961 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15962 %{
15963 match(Set cr (CmpI op1 op2));
15964
15965 format %{ "cmpl $op1, $op2" %}
15966 ins_encode %{
15967 __ cmpl($op1$$Register, $op2$$constant);
15968 %}
15969 ins_pipe(ialu_cr_reg_imm);
15970 %}
15971
15972 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15973 %{
15974 match(Set cr (CmpI op1 (LoadI op2)));
15975
15976 ins_cost(500); // XXX
15977 format %{ "cmpl $op1, $op2" %}
15978 ins_encode %{
15979 __ cmpl($op1$$Register, $op2$$Address);
15980 %}
15981 ins_pipe(ialu_cr_reg_mem);
15982 %}
15983
15984 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15985 %{
15986 match(Set cr (CmpI src zero));
15987
15988 format %{ "testl $src, $src" %}
15989 ins_encode %{
15990 __ testl($src$$Register, $src$$Register);
15991 %}
15992 ins_pipe(ialu_cr_reg_imm);
15993 %}
15994
15995 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15996 %{
15997 match(Set cr (CmpI (AndI src con) zero));
15998
15999 format %{ "testl $src, $con" %}
16000 ins_encode %{
16001 __ testl($src$$Register, $con$$constant);
16002 %}
16003 ins_pipe(ialu_cr_reg_imm);
16004 %}
16005
16006 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16007 %{
16008 match(Set cr (CmpI (AndI src1 src2) zero));
16009
16010 format %{ "testl $src1, $src2" %}
16011 ins_encode %{
16012 __ testl($src1$$Register, $src2$$Register);
16013 %}
16014 ins_pipe(ialu_cr_reg_imm);
16015 %}
16016
16017 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16018 %{
16019 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16020
16021 format %{ "testl $src, $mem" %}
16022 ins_encode %{
16023 __ testl($src$$Register, $mem$$Address);
16024 %}
16025 ins_pipe(ialu_cr_reg_mem);
16026 %}
16027
16028 // Unsigned compare Instructions; really, same as signed except they
16029 // produce an rFlagsRegU instead of rFlagsReg.
16030 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16031 %{
16032 match(Set cr (CmpU op1 op2));
16033
16034 format %{ "cmpl $op1, $op2\t# unsigned" %}
16035 ins_encode %{
16036 __ cmpl($op1$$Register, $op2$$Register);
16037 %}
16038 ins_pipe(ialu_cr_reg_reg);
16039 %}
16040
16041 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16042 %{
16043 match(Set cr (CmpU op1 op2));
16044
16045 format %{ "cmpl $op1, $op2\t# unsigned" %}
16046 ins_encode %{
16047 __ cmpl($op1$$Register, $op2$$constant);
16048 %}
16049 ins_pipe(ialu_cr_reg_imm);
16050 %}
16051
16052 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16053 %{
16054 match(Set cr (CmpU op1 (LoadI op2)));
16055
16056 ins_cost(500); // XXX
16057 format %{ "cmpl $op1, $op2\t# unsigned" %}
16058 ins_encode %{
16059 __ cmpl($op1$$Register, $op2$$Address);
16060 %}
16061 ins_pipe(ialu_cr_reg_mem);
16062 %}
16063
16064 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16065 %{
16066 match(Set cr (CmpU src zero));
16067
16068 format %{ "testl $src, $src\t# unsigned" %}
16069 ins_encode %{
16070 __ testl($src$$Register, $src$$Register);
16071 %}
16072 ins_pipe(ialu_cr_reg_imm);
16073 %}
16074
16075 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16076 %{
16077 match(Set cr (CmpP op1 op2));
16078
16079 format %{ "cmpq $op1, $op2\t# ptr" %}
16080 ins_encode %{
16081 __ cmpq($op1$$Register, $op2$$Register);
16082 %}
16083 ins_pipe(ialu_cr_reg_reg);
16084 %}
16085
16086 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16087 %{
16088 match(Set cr (CmpP op1 (LoadP op2)));
16089 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16090
16091 ins_cost(500); // XXX
16092 format %{ "cmpq $op1, $op2\t# ptr" %}
16093 ins_encode %{
16094 __ cmpq($op1$$Register, $op2$$Address);
16095 %}
16096 ins_pipe(ialu_cr_reg_mem);
16097 %}
16098
16099 // XXX this is generalized by compP_rReg_mem???
16100 // Compare raw pointer (used in out-of-heap check).
16101 // Only works because non-oop pointers must be raw pointers
16102 // and raw pointers have no anti-dependencies.
16103 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16104 %{
16105 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16106 n->in(2)->as_Load()->barrier_data() == 0);
16107 match(Set cr (CmpP op1 (LoadP op2)));
16108
16109 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16110 ins_encode %{
16111 __ cmpq($op1$$Register, $op2$$Address);
16112 %}
16113 ins_pipe(ialu_cr_reg_mem);
16114 %}
16115
16116 // This will generate a signed flags result. This should be OK since
16117 // any compare to a zero should be eq/neq.
16118 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16119 %{
16120 match(Set cr (CmpP src zero));
16121
16122 format %{ "testq $src, $src\t# ptr" %}
16123 ins_encode %{
16124 __ testq($src$$Register, $src$$Register);
16125 %}
16126 ins_pipe(ialu_cr_reg_imm);
16127 %}
16128
16129 // This will generate a signed flags result. This should be OK since
16130 // any compare to a zero should be eq/neq.
16131 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16132 %{
16133 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16134 n->in(1)->as_Load()->barrier_data() == 0);
16135 match(Set cr (CmpP (LoadP op) zero));
16136
16137 ins_cost(500); // XXX
16138 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16139 ins_encode %{
16140 __ testq($op$$Address, 0xFFFFFFFF);
16141 %}
16142 ins_pipe(ialu_cr_reg_imm);
16143 %}
16144
16145 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16146 %{
16147 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16148 n->in(1)->as_Load()->barrier_data() == 0);
16149 match(Set cr (CmpP (LoadP mem) zero));
16150
16151 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16152 ins_encode %{
16153 __ cmpq(r12, $mem$$Address);
16154 %}
16155 ins_pipe(ialu_cr_reg_mem);
16156 %}
16157
16158 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16159 %{
16160 match(Set cr (CmpN op1 op2));
16161
16162 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16163 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16164 ins_pipe(ialu_cr_reg_reg);
16165 %}
16166
16167 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16168 %{
16169 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16170 match(Set cr (CmpN src (LoadN mem)));
16171
16172 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16173 ins_encode %{
16174 __ cmpl($src$$Register, $mem$$Address);
16175 %}
16176 ins_pipe(ialu_cr_reg_mem);
16177 %}
16178
16179 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16180 match(Set cr (CmpN op1 op2));
16181
16182 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16183 ins_encode %{
16184 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16185 %}
16186 ins_pipe(ialu_cr_reg_imm);
16187 %}
16188
16189 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16190 %{
16191 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16192 match(Set cr (CmpN src (LoadN mem)));
16193
16194 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16195 ins_encode %{
16196 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16197 %}
16198 ins_pipe(ialu_cr_reg_mem);
16199 %}
16200
16201 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16202 match(Set cr (CmpN op1 op2));
16203
16204 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16205 ins_encode %{
16206 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16207 %}
16208 ins_pipe(ialu_cr_reg_imm);
16209 %}
16210
16211 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16212 %{
16213 predicate(!UseCompactObjectHeaders);
16214 match(Set cr (CmpN src (LoadNKlass mem)));
16215
16216 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16217 ins_encode %{
16218 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16219 %}
16220 ins_pipe(ialu_cr_reg_mem);
16221 %}
16222
16223 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16224 match(Set cr (CmpN src zero));
16225
16226 format %{ "testl $src, $src\t# compressed ptr" %}
16227 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16228 ins_pipe(ialu_cr_reg_imm);
16229 %}
16230
16231 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16232 %{
16233 predicate(CompressedOops::base() != nullptr &&
16234 n->in(1)->as_Load()->barrier_data() == 0);
16235 match(Set cr (CmpN (LoadN mem) zero));
16236
16237 ins_cost(500); // XXX
16238 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16239 ins_encode %{
16240 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16241 %}
16242 ins_pipe(ialu_cr_reg_mem);
16243 %}
16244
16245 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16246 %{
16247 predicate(CompressedOops::base() == nullptr &&
16248 n->in(1)->as_Load()->barrier_data() == 0);
16249 match(Set cr (CmpN (LoadN mem) zero));
16250
16251 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16252 ins_encode %{
16253 __ cmpl(r12, $mem$$Address);
16254 %}
16255 ins_pipe(ialu_cr_reg_mem);
16256 %}
16257
16258 // Yanked all unsigned pointer compare operations.
16259 // Pointer compares are done with CmpP which is already unsigned.
16260
16261 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16262 %{
16263 match(Set cr (CmpL op1 op2));
16264
16265 format %{ "cmpq $op1, $op2" %}
16266 ins_encode %{
16267 __ cmpq($op1$$Register, $op2$$Register);
16268 %}
16269 ins_pipe(ialu_cr_reg_reg);
16270 %}
16271
16272 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16273 %{
16274 match(Set cr (CmpL op1 op2));
16275
16276 format %{ "cmpq $op1, $op2" %}
16277 ins_encode %{
16278 __ cmpq($op1$$Register, $op2$$constant);
16279 %}
16280 ins_pipe(ialu_cr_reg_imm);
16281 %}
16282
16283 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16284 %{
16285 match(Set cr (CmpL op1 (LoadL op2)));
16286
16287 format %{ "cmpq $op1, $op2" %}
16288 ins_encode %{
16289 __ cmpq($op1$$Register, $op2$$Address);
16290 %}
16291 ins_pipe(ialu_cr_reg_mem);
16292 %}
16293
16294 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16295 %{
16296 match(Set cr (CmpL src zero));
16297
16298 format %{ "testq $src, $src" %}
16299 ins_encode %{
16300 __ testq($src$$Register, $src$$Register);
16301 %}
16302 ins_pipe(ialu_cr_reg_imm);
16303 %}
16304
16305 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16306 %{
16307 match(Set cr (CmpL (AndL src con) zero));
16308
16309 format %{ "testq $src, $con\t# long" %}
16310 ins_encode %{
16311 __ testq($src$$Register, $con$$constant);
16312 %}
16313 ins_pipe(ialu_cr_reg_imm);
16314 %}
16315
16316 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16317 %{
16318 match(Set cr (CmpL (AndL src1 src2) zero));
16319
16320 format %{ "testq $src1, $src2\t# long" %}
16321 ins_encode %{
16322 __ testq($src1$$Register, $src2$$Register);
16323 %}
16324 ins_pipe(ialu_cr_reg_imm);
16325 %}
16326
16327 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16328 %{
16329 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16330
16331 format %{ "testq $src, $mem" %}
16332 ins_encode %{
16333 __ testq($src$$Register, $mem$$Address);
16334 %}
16335 ins_pipe(ialu_cr_reg_mem);
16336 %}
16337
16338 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16339 %{
16340 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16341
16342 format %{ "testq $src, $mem" %}
16343 ins_encode %{
16344 __ testq($src$$Register, $mem$$Address);
16345 %}
16346 ins_pipe(ialu_cr_reg_mem);
16347 %}
16348
16349 // Manifest a CmpU result in an integer register. Very painful.
16350 // This is the test to avoid.
16351 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16352 %{
16353 match(Set dst (CmpU3 src1 src2));
16354 effect(KILL flags);
16355
16356 ins_cost(275); // XXX
16357 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16358 "movl $dst, -1\n\t"
16359 "jb,u done\n\t"
16360 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16361 "done:" %}
16362 ins_encode %{
16363 Label done;
16364 __ cmpl($src1$$Register, $src2$$Register);
16365 __ movl($dst$$Register, -1);
16366 __ jccb(Assembler::below, done);
16367 __ setcc(Assembler::notZero, $dst$$Register);
16368 __ bind(done);
16369 %}
16370 ins_pipe(pipe_slow);
16371 %}
16372
16373 // Manifest a CmpL result in an integer register. Very painful.
16374 // This is the test to avoid.
16375 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16376 %{
16377 match(Set dst (CmpL3 src1 src2));
16378 effect(KILL flags);
16379
16380 ins_cost(275); // XXX
16381 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16382 "movl $dst, -1\n\t"
16383 "jl,s done\n\t"
16384 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16385 "done:" %}
16386 ins_encode %{
16387 Label done;
16388 __ cmpq($src1$$Register, $src2$$Register);
16389 __ movl($dst$$Register, -1);
16390 __ jccb(Assembler::less, done);
16391 __ setcc(Assembler::notZero, $dst$$Register);
16392 __ bind(done);
16393 %}
16394 ins_pipe(pipe_slow);
16395 %}
16396
16397 // Manifest a CmpUL result in an integer register. Very painful.
16398 // This is the test to avoid.
16399 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16400 %{
16401 match(Set dst (CmpUL3 src1 src2));
16402 effect(KILL flags);
16403
16404 ins_cost(275); // XXX
16405 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16406 "movl $dst, -1\n\t"
16407 "jb,u done\n\t"
16408 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16409 "done:" %}
16410 ins_encode %{
16411 Label done;
16412 __ cmpq($src1$$Register, $src2$$Register);
16413 __ movl($dst$$Register, -1);
16414 __ jccb(Assembler::below, done);
16415 __ setcc(Assembler::notZero, $dst$$Register);
16416 __ bind(done);
16417 %}
16418 ins_pipe(pipe_slow);
16419 %}
16420
16421 // Unsigned long compare Instructions; really, same as signed long except they
16422 // produce an rFlagsRegU instead of rFlagsReg.
16423 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16424 %{
16425 match(Set cr (CmpUL op1 op2));
16426
16427 format %{ "cmpq $op1, $op2\t# unsigned" %}
16428 ins_encode %{
16429 __ cmpq($op1$$Register, $op2$$Register);
16430 %}
16431 ins_pipe(ialu_cr_reg_reg);
16432 %}
16433
16434 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16435 %{
16436 match(Set cr (CmpUL op1 op2));
16437
16438 format %{ "cmpq $op1, $op2\t# unsigned" %}
16439 ins_encode %{
16440 __ cmpq($op1$$Register, $op2$$constant);
16441 %}
16442 ins_pipe(ialu_cr_reg_imm);
16443 %}
16444
16445 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16446 %{
16447 match(Set cr (CmpUL op1 (LoadL op2)));
16448
16449 format %{ "cmpq $op1, $op2\t# unsigned" %}
16450 ins_encode %{
16451 __ cmpq($op1$$Register, $op2$$Address);
16452 %}
16453 ins_pipe(ialu_cr_reg_mem);
16454 %}
16455
16456 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16457 %{
16458 match(Set cr (CmpUL src zero));
16459
16460 format %{ "testq $src, $src\t# unsigned" %}
16461 ins_encode %{
16462 __ testq($src$$Register, $src$$Register);
16463 %}
16464 ins_pipe(ialu_cr_reg_imm);
16465 %}
16466
16467 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16468 %{
16469 match(Set cr (CmpI (LoadB mem) imm));
16470
16471 ins_cost(125);
16472 format %{ "cmpb $mem, $imm" %}
16473 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16474 ins_pipe(ialu_cr_reg_mem);
16475 %}
16476
16477 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16478 %{
16479 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16480
16481 ins_cost(125);
16482 format %{ "testb $mem, $imm\t# ubyte" %}
16483 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16484 ins_pipe(ialu_cr_reg_mem);
16485 %}
16486
16487 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16488 %{
16489 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16490
16491 ins_cost(125);
16492 format %{ "testb $mem, $imm\t# byte" %}
16493 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16494 ins_pipe(ialu_cr_reg_mem);
16495 %}
16496
16497 //----------Max and Min--------------------------------------------------------
16498 // Min Instructions
16499
16500 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16501 %{
16502 predicate(!UseAPX);
16503 effect(USE_DEF dst, USE src, USE cr);
16504
16505 format %{ "cmovlgt $dst, $src\t# min" %}
16506 ins_encode %{
16507 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16508 %}
16509 ins_pipe(pipe_cmov_reg);
16510 %}
16511
16512 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16513 %{
16514 predicate(UseAPX);
16515 effect(DEF dst, USE src1, USE src2, USE cr);
16516
16517 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16518 ins_encode %{
16519 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16520 %}
16521 ins_pipe(pipe_cmov_reg);
16522 %}
16523
16524 instruct minI_rReg(rRegI dst, rRegI src)
16525 %{
16526 predicate(!UseAPX);
16527 match(Set dst (MinI dst src));
16528
16529 ins_cost(200);
16530 expand %{
16531 rFlagsReg cr;
16532 compI_rReg(cr, dst, src);
16533 cmovI_reg_g(dst, src, cr);
16534 %}
16535 %}
16536
16537 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16538 %{
16539 predicate(UseAPX);
16540 match(Set dst (MinI src1 src2));
16541 effect(DEF dst, USE src1, USE src2);
16542
16543 ins_cost(200);
16544 expand %{
16545 rFlagsReg cr;
16546 compI_rReg(cr, src1, src2);
16547 cmovI_reg_g_ndd(dst, src1, src2, cr);
16548 %}
16549 %}
16550
16551 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16552 %{
16553 predicate(!UseAPX);
16554 effect(USE_DEF dst, USE src, USE cr);
16555
16556 format %{ "cmovllt $dst, $src\t# max" %}
16557 ins_encode %{
16558 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16559 %}
16560 ins_pipe(pipe_cmov_reg);
16561 %}
16562
16563 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16564 %{
16565 predicate(UseAPX);
16566 effect(DEF dst, USE src1, USE src2, USE cr);
16567
16568 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16569 ins_encode %{
16570 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16571 %}
16572 ins_pipe(pipe_cmov_reg);
16573 %}
16574
16575 instruct maxI_rReg(rRegI dst, rRegI src)
16576 %{
16577 predicate(!UseAPX);
16578 match(Set dst (MaxI dst src));
16579
16580 ins_cost(200);
16581 expand %{
16582 rFlagsReg cr;
16583 compI_rReg(cr, dst, src);
16584 cmovI_reg_l(dst, src, cr);
16585 %}
16586 %}
16587
16588 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16589 %{
16590 predicate(UseAPX);
16591 match(Set dst (MaxI src1 src2));
16592 effect(DEF dst, USE src1, USE src2);
16593
16594 ins_cost(200);
16595 expand %{
16596 rFlagsReg cr;
16597 compI_rReg(cr, src1, src2);
16598 cmovI_reg_l_ndd(dst, src1, src2, cr);
16599 %}
16600 %}
16601
16602 // ============================================================================
16603 // Branch Instructions
16604
16605 // Jump Direct - Label defines a relative address from JMP+1
16606 instruct jmpDir(label labl)
16607 %{
16608 match(Goto);
16609 effect(USE labl);
16610
16611 ins_cost(300);
16612 format %{ "jmp $labl" %}
16613 size(5);
16614 ins_encode %{
16615 Label* L = $labl$$label;
16616 __ jmp(*L, false); // Always long jump
16617 %}
16618 ins_pipe(pipe_jmp);
16619 %}
16620
16621 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16622 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16623 %{
16624 match(If cop cr);
16625 effect(USE labl);
16626
16627 ins_cost(300);
16628 format %{ "j$cop $labl" %}
16629 size(6);
16630 ins_encode %{
16631 Label* L = $labl$$label;
16632 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16633 %}
16634 ins_pipe(pipe_jcc);
16635 %}
16636
16637 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16638 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16639 %{
16640 match(CountedLoopEnd cop cr);
16641 effect(USE labl);
16642
16643 ins_cost(300);
16644 format %{ "j$cop $labl\t# loop end" %}
16645 size(6);
16646 ins_encode %{
16647 Label* L = $labl$$label;
16648 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16649 %}
16650 ins_pipe(pipe_jcc);
16651 %}
16652
16653 // Jump Direct Conditional - using unsigned comparison
16654 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16655 match(If cop cmp);
16656 effect(USE labl);
16657
16658 ins_cost(300);
16659 format %{ "j$cop,u $labl" %}
16660 size(6);
16661 ins_encode %{
16662 Label* L = $labl$$label;
16663 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16664 %}
16665 ins_pipe(pipe_jcc);
16666 %}
16667
16668 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16669 match(If cop cmp);
16670 effect(USE labl);
16671
16672 ins_cost(200);
16673 format %{ "j$cop,u $labl" %}
16674 size(6);
16675 ins_encode %{
16676 Label* L = $labl$$label;
16677 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16678 %}
16679 ins_pipe(pipe_jcc);
16680 %}
16681
16682 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16683 match(If cop cmp);
16684 effect(USE labl);
16685
16686 ins_cost(200);
16687 format %{ $$template
16688 if ($cop$$cmpcode == Assembler::notEqual) {
16689 $$emit$$"jp,u $labl\n\t"
16690 $$emit$$"j$cop,u $labl"
16691 } else {
16692 $$emit$$"jp,u done\n\t"
16693 $$emit$$"j$cop,u $labl\n\t"
16694 $$emit$$"done:"
16695 }
16696 %}
16697 ins_encode %{
16698 Label* l = $labl$$label;
16699 if ($cop$$cmpcode == Assembler::notEqual) {
16700 __ jcc(Assembler::parity, *l, false);
16701 __ jcc(Assembler::notEqual, *l, false);
16702 } else if ($cop$$cmpcode == Assembler::equal) {
16703 Label done;
16704 __ jccb(Assembler::parity, done);
16705 __ jcc(Assembler::equal, *l, false);
16706 __ bind(done);
16707 } else {
16708 ShouldNotReachHere();
16709 }
16710 %}
16711 ins_pipe(pipe_jcc);
16712 %}
16713
16714 // ============================================================================
16715 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16716 // superklass array for an instance of the superklass. Set a hidden
16717 // internal cache on a hit (cache is checked with exposed code in
16718 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16719 // encoding ALSO sets flags.
16720
16721 instruct partialSubtypeCheck(rdi_RegP result,
16722 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16723 rFlagsReg cr)
16724 %{
16725 match(Set result (PartialSubtypeCheck sub super));
16726 predicate(!UseSecondarySupersTable);
16727 effect(KILL rcx, KILL cr);
16728
16729 ins_cost(1100); // slightly larger than the next version
16730 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16731 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16732 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16733 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16734 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16735 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16736 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16737 "miss:\t" %}
16738
16739 ins_encode %{
16740 Label miss;
16741 // NB: Callers may assume that, when $result is a valid register,
16742 // check_klass_subtype_slow_path_linear sets it to a nonzero
16743 // value.
16744 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16745 $rcx$$Register, $result$$Register,
16746 nullptr, &miss,
16747 /*set_cond_codes:*/ true);
16748 __ xorptr($result$$Register, $result$$Register);
16749 __ bind(miss);
16750 %}
16751
16752 ins_pipe(pipe_slow);
16753 %}
16754
16755 // ============================================================================
16756 // Two versions of hashtable-based partialSubtypeCheck, both used when
16757 // we need to search for a super class in the secondary supers array.
16758 // The first is used when we don't know _a priori_ the class being
16759 // searched for. The second, far more common, is used when we do know:
16760 // this is used for instanceof, checkcast, and any case where C2 can
16761 // determine it by constant propagation.
16762
16763 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16764 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16765 rFlagsReg cr)
16766 %{
16767 match(Set result (PartialSubtypeCheck sub super));
16768 predicate(UseSecondarySupersTable);
16769 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16770
16771 ins_cost(1000);
16772 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16773
16774 ins_encode %{
16775 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16776 $temp3$$Register, $temp4$$Register, $result$$Register);
16777 %}
16778
16779 ins_pipe(pipe_slow);
16780 %}
16781
16782 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16783 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16784 rFlagsReg cr)
16785 %{
16786 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16787 predicate(UseSecondarySupersTable);
16788 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16789
16790 ins_cost(700); // smaller than the next version
16791 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16792
16793 ins_encode %{
16794 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16795 if (InlineSecondarySupersTest) {
16796 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16797 $temp3$$Register, $temp4$$Register, $result$$Register,
16798 super_klass_slot);
16799 } else {
16800 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16801 }
16802 %}
16803
16804 ins_pipe(pipe_slow);
16805 %}
16806
16807 // ============================================================================
16808 // Branch Instructions -- short offset versions
16809 //
16810 // These instructions are used to replace jumps of a long offset (the default
16811 // match) with jumps of a shorter offset. These instructions are all tagged
16812 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16813 // match rules in general matching. Instead, the ADLC generates a conversion
16814 // method in the MachNode which can be used to do in-place replacement of the
16815 // long variant with the shorter variant. The compiler will determine if a
16816 // branch can be taken by the is_short_branch_offset() predicate in the machine
16817 // specific code section of the file.
16818
16819 // Jump Direct - Label defines a relative address from JMP+1
16820 instruct jmpDir_short(label labl) %{
16821 match(Goto);
16822 effect(USE labl);
16823
16824 ins_cost(300);
16825 format %{ "jmp,s $labl" %}
16826 size(2);
16827 ins_encode %{
16828 Label* L = $labl$$label;
16829 __ jmpb(*L);
16830 %}
16831 ins_pipe(pipe_jmp);
16832 ins_short_branch(1);
16833 %}
16834
16835 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16836 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16837 match(If cop cr);
16838 effect(USE labl);
16839
16840 ins_cost(300);
16841 format %{ "j$cop,s $labl" %}
16842 size(2);
16843 ins_encode %{
16844 Label* L = $labl$$label;
16845 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16846 %}
16847 ins_pipe(pipe_jcc);
16848 ins_short_branch(1);
16849 %}
16850
16851 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16852 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16853 match(CountedLoopEnd cop cr);
16854 effect(USE labl);
16855
16856 ins_cost(300);
16857 format %{ "j$cop,s $labl\t# loop end" %}
16858 size(2);
16859 ins_encode %{
16860 Label* L = $labl$$label;
16861 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16862 %}
16863 ins_pipe(pipe_jcc);
16864 ins_short_branch(1);
16865 %}
16866
16867 // Jump Direct Conditional - using unsigned comparison
16868 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16869 match(If cop cmp);
16870 effect(USE labl);
16871
16872 ins_cost(300);
16873 format %{ "j$cop,us $labl" %}
16874 size(2);
16875 ins_encode %{
16876 Label* L = $labl$$label;
16877 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16878 %}
16879 ins_pipe(pipe_jcc);
16880 ins_short_branch(1);
16881 %}
16882
16883 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16884 match(If cop cmp);
16885 effect(USE labl);
16886
16887 ins_cost(300);
16888 format %{ "j$cop,us $labl" %}
16889 size(2);
16890 ins_encode %{
16891 Label* L = $labl$$label;
16892 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16893 %}
16894 ins_pipe(pipe_jcc);
16895 ins_short_branch(1);
16896 %}
16897
16898 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16899 match(If cop cmp);
16900 effect(USE labl);
16901
16902 ins_cost(300);
16903 format %{ $$template
16904 if ($cop$$cmpcode == Assembler::notEqual) {
16905 $$emit$$"jp,u,s $labl\n\t"
16906 $$emit$$"j$cop,u,s $labl"
16907 } else {
16908 $$emit$$"jp,u,s done\n\t"
16909 $$emit$$"j$cop,u,s $labl\n\t"
16910 $$emit$$"done:"
16911 }
16912 %}
16913 size(4);
16914 ins_encode %{
16915 Label* l = $labl$$label;
16916 if ($cop$$cmpcode == Assembler::notEqual) {
16917 __ jccb(Assembler::parity, *l);
16918 __ jccb(Assembler::notEqual, *l);
16919 } else if ($cop$$cmpcode == Assembler::equal) {
16920 Label done;
16921 __ jccb(Assembler::parity, done);
16922 __ jccb(Assembler::equal, *l);
16923 __ bind(done);
16924 } else {
16925 ShouldNotReachHere();
16926 }
16927 %}
16928 ins_pipe(pipe_jcc);
16929 ins_short_branch(1);
16930 %}
16931
16932 // ============================================================================
16933 // inlined locking and unlocking
16934
16935 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16936 match(Set cr (FastLock object box));
16937 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16938 ins_cost(300);
16939 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16940 ins_encode %{
16941 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16942 %}
16943 ins_pipe(pipe_slow);
16944 %}
16945
16946 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16947 match(Set cr (FastUnlock object rax_reg));
16948 effect(TEMP tmp, USE_KILL rax_reg);
16949 ins_cost(300);
16950 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16951 ins_encode %{
16952 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16953 %}
16954 ins_pipe(pipe_slow);
16955 %}
16956
16957
16958 // ============================================================================
16959 // Safepoint Instructions
16960 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16961 %{
16962 match(SafePoint poll);
16963 effect(KILL cr, USE poll);
16964
16965 format %{ "testl rax, [$poll]\t"
16966 "# Safepoint: poll for GC" %}
16967 ins_cost(125);
16968 ins_encode %{
16969 __ relocate(relocInfo::poll_type);
16970 address pre_pc = __ pc();
16971 __ testl(rax, Address($poll$$Register, 0));
16972 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16973 %}
16974 ins_pipe(ialu_reg_mem);
16975 %}
16976
16977 instruct mask_all_evexL(kReg dst, rRegL src) %{
16978 match(Set dst (MaskAll src));
16979 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16980 ins_encode %{
16981 int mask_len = Matcher::vector_length(this);
16982 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16983 %}
16984 ins_pipe( pipe_slow );
16985 %}
16986
16987 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16988 predicate(Matcher::vector_length(n) > 32);
16989 match(Set dst (MaskAll src));
16990 effect(TEMP tmp);
16991 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16992 ins_encode %{
16993 int mask_len = Matcher::vector_length(this);
16994 __ movslq($tmp$$Register, $src$$Register);
16995 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16996 %}
16997 ins_pipe( pipe_slow );
16998 %}
16999
17000 // ============================================================================
17001 // Procedure Call/Return Instructions
17002 // Call Java Static Instruction
17003 // Note: If this code changes, the corresponding ret_addr_offset() and
17004 // compute_padding() functions will have to be adjusted.
17005 instruct CallStaticJavaDirect(method meth) %{
17006 match(CallStaticJava);
17007 effect(USE meth);
17008
17009 ins_cost(300);
17010 format %{ "call,static " %}
17011 opcode(0xE8); /* E8 cd */
17012 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17013 ins_pipe(pipe_slow);
17014 ins_alignment(4);
17015 %}
17016
17017 // Call Java Dynamic Instruction
17018 // Note: If this code changes, the corresponding ret_addr_offset() and
17019 // compute_padding() functions will have to be adjusted.
17020 instruct CallDynamicJavaDirect(method meth)
17021 %{
17022 match(CallDynamicJava);
17023 effect(USE meth);
17024
17025 ins_cost(300);
17026 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17027 "call,dynamic " %}
17028 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17029 ins_pipe(pipe_slow);
17030 ins_alignment(4);
17031 %}
17032
17033 // Call Runtime Instruction
17034 instruct CallRuntimeDirect(method meth)
17035 %{
17036 match(CallRuntime);
17037 effect(USE meth);
17038
17039 ins_cost(300);
17040 format %{ "call,runtime " %}
17041 ins_encode(clear_avx, Java_To_Runtime(meth));
17042 ins_pipe(pipe_slow);
17043 %}
17044
17045 // Call runtime without safepoint
17046 instruct CallLeafDirect(method meth)
17047 %{
17048 match(CallLeaf);
17049 effect(USE meth);
17050
17051 ins_cost(300);
17052 format %{ "call_leaf,runtime " %}
17053 ins_encode(clear_avx, Java_To_Runtime(meth));
17054 ins_pipe(pipe_slow);
17055 %}
17056
17057 // Call runtime without safepoint and with vector arguments
17058 instruct CallLeafDirectVector(method meth)
17059 %{
17060 match(CallLeafVector);
17061 effect(USE meth);
17062
17063 ins_cost(300);
17064 format %{ "call_leaf,vector " %}
17065 ins_encode(Java_To_Runtime(meth));
17066 ins_pipe(pipe_slow);
17067 %}
17068
17069 // Call runtime without safepoint
17070 instruct CallLeafNoFPDirect(method meth)
17071 %{
17072 match(CallLeafNoFP);
17073 effect(USE meth);
17074
17075 ins_cost(300);
17076 format %{ "call_leaf_nofp,runtime " %}
17077 ins_encode(clear_avx, Java_To_Runtime(meth));
17078 ins_pipe(pipe_slow);
17079 %}
17080
17081 // Return Instruction
17082 // Remove the return address & jump to it.
17083 // Notice: We always emit a nop after a ret to make sure there is room
17084 // for safepoint patching
17085 instruct Ret()
17086 %{
17087 match(Return);
17088
17089 format %{ "ret" %}
17090 ins_encode %{
17091 __ ret(0);
17092 %}
17093 ins_pipe(pipe_jmp);
17094 %}
17095
17096 // Tail Call; Jump from runtime stub to Java code.
17097 // Also known as an 'interprocedural jump'.
17098 // Target of jump will eventually return to caller.
17099 // TailJump below removes the return address.
17100 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17101 // emitted just above the TailCall which has reset rbp to the caller state.
17102 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17103 %{
17104 match(TailCall jump_target method_ptr);
17105
17106 ins_cost(300);
17107 format %{ "jmp $jump_target\t# rbx holds method" %}
17108 ins_encode %{
17109 __ jmp($jump_target$$Register);
17110 %}
17111 ins_pipe(pipe_jmp);
17112 %}
17113
17114 // Tail Jump; remove the return address; jump to target.
17115 // TailCall above leaves the return address around.
17116 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17117 %{
17118 match(TailJump jump_target ex_oop);
17119
17120 ins_cost(300);
17121 format %{ "popq rdx\t# pop return address\n\t"
17122 "jmp $jump_target" %}
17123 ins_encode %{
17124 __ popq(as_Register(RDX_enc));
17125 __ jmp($jump_target$$Register);
17126 %}
17127 ins_pipe(pipe_jmp);
17128 %}
17129
17130 // Forward exception.
17131 instruct ForwardExceptionjmp()
17132 %{
17133 match(ForwardException);
17134
17135 format %{ "jmp forward_exception_stub" %}
17136 ins_encode %{
17137 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17138 %}
17139 ins_pipe(pipe_jmp);
17140 %}
17141
17142 // Create exception oop: created by stack-crawling runtime code.
17143 // Created exception is now available to this handler, and is setup
17144 // just prior to jumping to this handler. No code emitted.
17145 instruct CreateException(rax_RegP ex_oop)
17146 %{
17147 match(Set ex_oop (CreateEx));
17148
17149 size(0);
17150 // use the following format syntax
17151 format %{ "# exception oop is in rax; no code emitted" %}
17152 ins_encode();
17153 ins_pipe(empty);
17154 %}
17155
17156 // Rethrow exception:
17157 // The exception oop will come in the first argument position.
17158 // Then JUMP (not call) to the rethrow stub code.
17159 instruct RethrowException()
17160 %{
17161 match(Rethrow);
17162
17163 // use the following format syntax
17164 format %{ "jmp rethrow_stub" %}
17165 ins_encode %{
17166 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17167 %}
17168 ins_pipe(pipe_jmp);
17169 %}
17170
17171 // ============================================================================
17172 // This name is KNOWN by the ADLC and cannot be changed.
17173 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17174 // for this guy.
17175 instruct tlsLoadP(r15_RegP dst) %{
17176 match(Set dst (ThreadLocal));
17177 effect(DEF dst);
17178
17179 size(0);
17180 format %{ "# TLS is in R15" %}
17181 ins_encode( /*empty encoding*/ );
17182 ins_pipe(ialu_reg_reg);
17183 %}
17184
17185 instruct addF_reg(regF dst, regF src) %{
17186 predicate(UseAVX == 0);
17187 match(Set dst (AddF dst src));
17188
17189 format %{ "addss $dst, $src" %}
17190 ins_cost(150);
17191 ins_encode %{
17192 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17193 %}
17194 ins_pipe(pipe_slow);
17195 %}
17196
17197 instruct addF_mem(regF dst, memory src) %{
17198 predicate(UseAVX == 0);
17199 match(Set dst (AddF dst (LoadF src)));
17200
17201 format %{ "addss $dst, $src" %}
17202 ins_cost(150);
17203 ins_encode %{
17204 __ addss($dst$$XMMRegister, $src$$Address);
17205 %}
17206 ins_pipe(pipe_slow);
17207 %}
17208
17209 instruct addF_imm(regF dst, immF con) %{
17210 predicate(UseAVX == 0);
17211 match(Set dst (AddF dst con));
17212 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17213 ins_cost(150);
17214 ins_encode %{
17215 __ addss($dst$$XMMRegister, $constantaddress($con));
17216 %}
17217 ins_pipe(pipe_slow);
17218 %}
17219
17220 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17221 predicate(UseAVX > 0);
17222 match(Set dst (AddF src1 src2));
17223
17224 format %{ "vaddss $dst, $src1, $src2" %}
17225 ins_cost(150);
17226 ins_encode %{
17227 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17228 %}
17229 ins_pipe(pipe_slow);
17230 %}
17231
17232 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17233 predicate(UseAVX > 0);
17234 match(Set dst (AddF src1 (LoadF src2)));
17235
17236 format %{ "vaddss $dst, $src1, $src2" %}
17237 ins_cost(150);
17238 ins_encode %{
17239 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17240 %}
17241 ins_pipe(pipe_slow);
17242 %}
17243
17244 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17245 predicate(UseAVX > 0);
17246 match(Set dst (AddF src con));
17247
17248 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17249 ins_cost(150);
17250 ins_encode %{
17251 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17252 %}
17253 ins_pipe(pipe_slow);
17254 %}
17255
17256 instruct addD_reg(regD dst, regD src) %{
17257 predicate(UseAVX == 0);
17258 match(Set dst (AddD dst src));
17259
17260 format %{ "addsd $dst, $src" %}
17261 ins_cost(150);
17262 ins_encode %{
17263 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17264 %}
17265 ins_pipe(pipe_slow);
17266 %}
17267
17268 instruct addD_mem(regD dst, memory src) %{
17269 predicate(UseAVX == 0);
17270 match(Set dst (AddD dst (LoadD src)));
17271
17272 format %{ "addsd $dst, $src" %}
17273 ins_cost(150);
17274 ins_encode %{
17275 __ addsd($dst$$XMMRegister, $src$$Address);
17276 %}
17277 ins_pipe(pipe_slow);
17278 %}
17279
17280 instruct addD_imm(regD dst, immD con) %{
17281 predicate(UseAVX == 0);
17282 match(Set dst (AddD dst con));
17283 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17284 ins_cost(150);
17285 ins_encode %{
17286 __ addsd($dst$$XMMRegister, $constantaddress($con));
17287 %}
17288 ins_pipe(pipe_slow);
17289 %}
17290
17291 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17292 predicate(UseAVX > 0);
17293 match(Set dst (AddD src1 src2));
17294
17295 format %{ "vaddsd $dst, $src1, $src2" %}
17296 ins_cost(150);
17297 ins_encode %{
17298 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17299 %}
17300 ins_pipe(pipe_slow);
17301 %}
17302
17303 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17304 predicate(UseAVX > 0);
17305 match(Set dst (AddD src1 (LoadD src2)));
17306
17307 format %{ "vaddsd $dst, $src1, $src2" %}
17308 ins_cost(150);
17309 ins_encode %{
17310 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17311 %}
17312 ins_pipe(pipe_slow);
17313 %}
17314
17315 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17316 predicate(UseAVX > 0);
17317 match(Set dst (AddD src con));
17318
17319 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17320 ins_cost(150);
17321 ins_encode %{
17322 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17323 %}
17324 ins_pipe(pipe_slow);
17325 %}
17326
17327 instruct subF_reg(regF dst, regF src) %{
17328 predicate(UseAVX == 0);
17329 match(Set dst (SubF dst src));
17330
17331 format %{ "subss $dst, $src" %}
17332 ins_cost(150);
17333 ins_encode %{
17334 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17335 %}
17336 ins_pipe(pipe_slow);
17337 %}
17338
17339 instruct subF_mem(regF dst, memory src) %{
17340 predicate(UseAVX == 0);
17341 match(Set dst (SubF dst (LoadF src)));
17342
17343 format %{ "subss $dst, $src" %}
17344 ins_cost(150);
17345 ins_encode %{
17346 __ subss($dst$$XMMRegister, $src$$Address);
17347 %}
17348 ins_pipe(pipe_slow);
17349 %}
17350
17351 instruct subF_imm(regF dst, immF con) %{
17352 predicate(UseAVX == 0);
17353 match(Set dst (SubF dst con));
17354 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17355 ins_cost(150);
17356 ins_encode %{
17357 __ subss($dst$$XMMRegister, $constantaddress($con));
17358 %}
17359 ins_pipe(pipe_slow);
17360 %}
17361
17362 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17363 predicate(UseAVX > 0);
17364 match(Set dst (SubF src1 src2));
17365
17366 format %{ "vsubss $dst, $src1, $src2" %}
17367 ins_cost(150);
17368 ins_encode %{
17369 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17370 %}
17371 ins_pipe(pipe_slow);
17372 %}
17373
17374 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17375 predicate(UseAVX > 0);
17376 match(Set dst (SubF src1 (LoadF src2)));
17377
17378 format %{ "vsubss $dst, $src1, $src2" %}
17379 ins_cost(150);
17380 ins_encode %{
17381 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17382 %}
17383 ins_pipe(pipe_slow);
17384 %}
17385
17386 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17387 predicate(UseAVX > 0);
17388 match(Set dst (SubF src con));
17389
17390 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17391 ins_cost(150);
17392 ins_encode %{
17393 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17394 %}
17395 ins_pipe(pipe_slow);
17396 %}
17397
17398 instruct subD_reg(regD dst, regD src) %{
17399 predicate(UseAVX == 0);
17400 match(Set dst (SubD dst src));
17401
17402 format %{ "subsd $dst, $src" %}
17403 ins_cost(150);
17404 ins_encode %{
17405 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17406 %}
17407 ins_pipe(pipe_slow);
17408 %}
17409
17410 instruct subD_mem(regD dst, memory src) %{
17411 predicate(UseAVX == 0);
17412 match(Set dst (SubD dst (LoadD src)));
17413
17414 format %{ "subsd $dst, $src" %}
17415 ins_cost(150);
17416 ins_encode %{
17417 __ subsd($dst$$XMMRegister, $src$$Address);
17418 %}
17419 ins_pipe(pipe_slow);
17420 %}
17421
17422 instruct subD_imm(regD dst, immD con) %{
17423 predicate(UseAVX == 0);
17424 match(Set dst (SubD dst con));
17425 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17426 ins_cost(150);
17427 ins_encode %{
17428 __ subsd($dst$$XMMRegister, $constantaddress($con));
17429 %}
17430 ins_pipe(pipe_slow);
17431 %}
17432
17433 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17434 predicate(UseAVX > 0);
17435 match(Set dst (SubD src1 src2));
17436
17437 format %{ "vsubsd $dst, $src1, $src2" %}
17438 ins_cost(150);
17439 ins_encode %{
17440 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17441 %}
17442 ins_pipe(pipe_slow);
17443 %}
17444
17445 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17446 predicate(UseAVX > 0);
17447 match(Set dst (SubD src1 (LoadD src2)));
17448
17449 format %{ "vsubsd $dst, $src1, $src2" %}
17450 ins_cost(150);
17451 ins_encode %{
17452 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17453 %}
17454 ins_pipe(pipe_slow);
17455 %}
17456
17457 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17458 predicate(UseAVX > 0);
17459 match(Set dst (SubD src con));
17460
17461 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17462 ins_cost(150);
17463 ins_encode %{
17464 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17465 %}
17466 ins_pipe(pipe_slow);
17467 %}
17468
17469 instruct mulF_reg(regF dst, regF src) %{
17470 predicate(UseAVX == 0);
17471 match(Set dst (MulF dst src));
17472
17473 format %{ "mulss $dst, $src" %}
17474 ins_cost(150);
17475 ins_encode %{
17476 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17477 %}
17478 ins_pipe(pipe_slow);
17479 %}
17480
17481 instruct mulF_mem(regF dst, memory src) %{
17482 predicate(UseAVX == 0);
17483 match(Set dst (MulF dst (LoadF src)));
17484
17485 format %{ "mulss $dst, $src" %}
17486 ins_cost(150);
17487 ins_encode %{
17488 __ mulss($dst$$XMMRegister, $src$$Address);
17489 %}
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493 instruct mulF_imm(regF dst, immF con) %{
17494 predicate(UseAVX == 0);
17495 match(Set dst (MulF dst con));
17496 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17497 ins_cost(150);
17498 ins_encode %{
17499 __ mulss($dst$$XMMRegister, $constantaddress($con));
17500 %}
17501 ins_pipe(pipe_slow);
17502 %}
17503
17504 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17505 predicate(UseAVX > 0);
17506 match(Set dst (MulF src1 src2));
17507
17508 format %{ "vmulss $dst, $src1, $src2" %}
17509 ins_cost(150);
17510 ins_encode %{
17511 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17512 %}
17513 ins_pipe(pipe_slow);
17514 %}
17515
17516 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17517 predicate(UseAVX > 0);
17518 match(Set dst (MulF src1 (LoadF src2)));
17519
17520 format %{ "vmulss $dst, $src1, $src2" %}
17521 ins_cost(150);
17522 ins_encode %{
17523 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17524 %}
17525 ins_pipe(pipe_slow);
17526 %}
17527
17528 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17529 predicate(UseAVX > 0);
17530 match(Set dst (MulF src con));
17531
17532 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17533 ins_cost(150);
17534 ins_encode %{
17535 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17536 %}
17537 ins_pipe(pipe_slow);
17538 %}
17539
17540 instruct mulD_reg(regD dst, regD src) %{
17541 predicate(UseAVX == 0);
17542 match(Set dst (MulD dst src));
17543
17544 format %{ "mulsd $dst, $src" %}
17545 ins_cost(150);
17546 ins_encode %{
17547 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17548 %}
17549 ins_pipe(pipe_slow);
17550 %}
17551
17552 instruct mulD_mem(regD dst, memory src) %{
17553 predicate(UseAVX == 0);
17554 match(Set dst (MulD dst (LoadD src)));
17555
17556 format %{ "mulsd $dst, $src" %}
17557 ins_cost(150);
17558 ins_encode %{
17559 __ mulsd($dst$$XMMRegister, $src$$Address);
17560 %}
17561 ins_pipe(pipe_slow);
17562 %}
17563
17564 instruct mulD_imm(regD dst, immD con) %{
17565 predicate(UseAVX == 0);
17566 match(Set dst (MulD dst con));
17567 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17568 ins_cost(150);
17569 ins_encode %{
17570 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17571 %}
17572 ins_pipe(pipe_slow);
17573 %}
17574
17575 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17576 predicate(UseAVX > 0);
17577 match(Set dst (MulD src1 src2));
17578
17579 format %{ "vmulsd $dst, $src1, $src2" %}
17580 ins_cost(150);
17581 ins_encode %{
17582 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17583 %}
17584 ins_pipe(pipe_slow);
17585 %}
17586
17587 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17588 predicate(UseAVX > 0);
17589 match(Set dst (MulD src1 (LoadD src2)));
17590
17591 format %{ "vmulsd $dst, $src1, $src2" %}
17592 ins_cost(150);
17593 ins_encode %{
17594 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17595 %}
17596 ins_pipe(pipe_slow);
17597 %}
17598
17599 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17600 predicate(UseAVX > 0);
17601 match(Set dst (MulD src con));
17602
17603 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17604 ins_cost(150);
17605 ins_encode %{
17606 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17607 %}
17608 ins_pipe(pipe_slow);
17609 %}
17610
17611 instruct divF_reg(regF dst, regF src) %{
17612 predicate(UseAVX == 0);
17613 match(Set dst (DivF dst src));
17614
17615 format %{ "divss $dst, $src" %}
17616 ins_cost(150);
17617 ins_encode %{
17618 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17619 %}
17620 ins_pipe(pipe_slow);
17621 %}
17622
17623 instruct divF_mem(regF dst, memory src) %{
17624 predicate(UseAVX == 0);
17625 match(Set dst (DivF dst (LoadF src)));
17626
17627 format %{ "divss $dst, $src" %}
17628 ins_cost(150);
17629 ins_encode %{
17630 __ divss($dst$$XMMRegister, $src$$Address);
17631 %}
17632 ins_pipe(pipe_slow);
17633 %}
17634
17635 instruct divF_imm(regF dst, immF con) %{
17636 predicate(UseAVX == 0);
17637 match(Set dst (DivF dst con));
17638 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17639 ins_cost(150);
17640 ins_encode %{
17641 __ divss($dst$$XMMRegister, $constantaddress($con));
17642 %}
17643 ins_pipe(pipe_slow);
17644 %}
17645
17646 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17647 predicate(UseAVX > 0);
17648 match(Set dst (DivF src1 src2));
17649
17650 format %{ "vdivss $dst, $src1, $src2" %}
17651 ins_cost(150);
17652 ins_encode %{
17653 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17654 %}
17655 ins_pipe(pipe_slow);
17656 %}
17657
17658 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17659 predicate(UseAVX > 0);
17660 match(Set dst (DivF src1 (LoadF src2)));
17661
17662 format %{ "vdivss $dst, $src1, $src2" %}
17663 ins_cost(150);
17664 ins_encode %{
17665 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17666 %}
17667 ins_pipe(pipe_slow);
17668 %}
17669
17670 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17671 predicate(UseAVX > 0);
17672 match(Set dst (DivF src con));
17673
17674 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17675 ins_cost(150);
17676 ins_encode %{
17677 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17678 %}
17679 ins_pipe(pipe_slow);
17680 %}
17681
17682 instruct divD_reg(regD dst, regD src) %{
17683 predicate(UseAVX == 0);
17684 match(Set dst (DivD dst src));
17685
17686 format %{ "divsd $dst, $src" %}
17687 ins_cost(150);
17688 ins_encode %{
17689 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17690 %}
17691 ins_pipe(pipe_slow);
17692 %}
17693
17694 instruct divD_mem(regD dst, memory src) %{
17695 predicate(UseAVX == 0);
17696 match(Set dst (DivD dst (LoadD src)));
17697
17698 format %{ "divsd $dst, $src" %}
17699 ins_cost(150);
17700 ins_encode %{
17701 __ divsd($dst$$XMMRegister, $src$$Address);
17702 %}
17703 ins_pipe(pipe_slow);
17704 %}
17705
17706 instruct divD_imm(regD dst, immD con) %{
17707 predicate(UseAVX == 0);
17708 match(Set dst (DivD dst con));
17709 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17710 ins_cost(150);
17711 ins_encode %{
17712 __ divsd($dst$$XMMRegister, $constantaddress($con));
17713 %}
17714 ins_pipe(pipe_slow);
17715 %}
17716
17717 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17718 predicate(UseAVX > 0);
17719 match(Set dst (DivD src1 src2));
17720
17721 format %{ "vdivsd $dst, $src1, $src2" %}
17722 ins_cost(150);
17723 ins_encode %{
17724 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17725 %}
17726 ins_pipe(pipe_slow);
17727 %}
17728
17729 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17730 predicate(UseAVX > 0);
17731 match(Set dst (DivD src1 (LoadD src2)));
17732
17733 format %{ "vdivsd $dst, $src1, $src2" %}
17734 ins_cost(150);
17735 ins_encode %{
17736 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17737 %}
17738 ins_pipe(pipe_slow);
17739 %}
17740
17741 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17742 predicate(UseAVX > 0);
17743 match(Set dst (DivD src con));
17744
17745 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17746 ins_cost(150);
17747 ins_encode %{
17748 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17749 %}
17750 ins_pipe(pipe_slow);
17751 %}
17752
17753 instruct absF_reg(regF dst) %{
17754 predicate(UseAVX == 0);
17755 match(Set dst (AbsF dst));
17756 ins_cost(150);
17757 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17758 ins_encode %{
17759 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17760 %}
17761 ins_pipe(pipe_slow);
17762 %}
17763
17764 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17765 predicate(UseAVX > 0);
17766 match(Set dst (AbsF src));
17767 ins_cost(150);
17768 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17769 ins_encode %{
17770 int vlen_enc = Assembler::AVX_128bit;
17771 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17772 ExternalAddress(float_signmask()), vlen_enc);
17773 %}
17774 ins_pipe(pipe_slow);
17775 %}
17776
17777 instruct absD_reg(regD dst) %{
17778 predicate(UseAVX == 0);
17779 match(Set dst (AbsD dst));
17780 ins_cost(150);
17781 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17782 "# abs double by sign masking" %}
17783 ins_encode %{
17784 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17785 %}
17786 ins_pipe(pipe_slow);
17787 %}
17788
17789 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17790 predicate(UseAVX > 0);
17791 match(Set dst (AbsD src));
17792 ins_cost(150);
17793 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17794 "# abs double by sign masking" %}
17795 ins_encode %{
17796 int vlen_enc = Assembler::AVX_128bit;
17797 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17798 ExternalAddress(double_signmask()), vlen_enc);
17799 %}
17800 ins_pipe(pipe_slow);
17801 %}
17802
17803 instruct negF_reg(regF dst) %{
17804 predicate(UseAVX == 0);
17805 match(Set dst (NegF dst));
17806 ins_cost(150);
17807 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17808 ins_encode %{
17809 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17810 %}
17811 ins_pipe(pipe_slow);
17812 %}
17813
17814 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17815 predicate(UseAVX > 0);
17816 match(Set dst (NegF src));
17817 ins_cost(150);
17818 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17819 ins_encode %{
17820 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17821 ExternalAddress(float_signflip()));
17822 %}
17823 ins_pipe(pipe_slow);
17824 %}
17825
17826 instruct negD_reg(regD dst) %{
17827 predicate(UseAVX == 0);
17828 match(Set dst (NegD dst));
17829 ins_cost(150);
17830 format %{ "xorpd $dst, [0x8000000000000000]\t"
17831 "# neg double by sign flipping" %}
17832 ins_encode %{
17833 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17834 %}
17835 ins_pipe(pipe_slow);
17836 %}
17837
17838 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17839 predicate(UseAVX > 0);
17840 match(Set dst (NegD src));
17841 ins_cost(150);
17842 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17843 "# neg double by sign flipping" %}
17844 ins_encode %{
17845 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17846 ExternalAddress(double_signflip()));
17847 %}
17848 ins_pipe(pipe_slow);
17849 %}
17850
17851 // sqrtss instruction needs destination register to be pre initialized for best performance
17852 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17853 instruct sqrtF_reg(regF dst) %{
17854 match(Set dst (SqrtF dst));
17855 format %{ "sqrtss $dst, $dst" %}
17856 ins_encode %{
17857 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17858 %}
17859 ins_pipe(pipe_slow);
17860 %}
17861
17862 // sqrtsd instruction needs destination register to be pre initialized for best performance
17863 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17864 instruct sqrtD_reg(regD dst) %{
17865 match(Set dst (SqrtD dst));
17866 format %{ "sqrtsd $dst, $dst" %}
17867 ins_encode %{
17868 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17869 %}
17870 ins_pipe(pipe_slow);
17871 %}
17872
17873 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17874 effect(TEMP tmp);
17875 match(Set dst (ConvF2HF src));
17876 ins_cost(125);
17877 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17878 ins_encode %{
17879 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17880 %}
17881 ins_pipe( pipe_slow );
17882 %}
17883
17884 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17885 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17886 effect(TEMP ktmp, TEMP rtmp);
17887 match(Set mem (StoreC mem (ConvF2HF src)));
17888 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17889 ins_encode %{
17890 __ movl($rtmp$$Register, 0x1);
17891 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17892 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17893 %}
17894 ins_pipe( pipe_slow );
17895 %}
17896
17897 instruct vconvF2HF(vec dst, vec src) %{
17898 match(Set dst (VectorCastF2HF src));
17899 format %{ "vector_conv_F2HF $dst $src" %}
17900 ins_encode %{
17901 int vlen_enc = vector_length_encoding(this, $src);
17902 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17903 %}
17904 ins_pipe( pipe_slow );
17905 %}
17906
17907 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17908 predicate(n->as_StoreVector()->memory_size() >= 16);
17909 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17910 format %{ "vcvtps2ph $mem,$src" %}
17911 ins_encode %{
17912 int vlen_enc = vector_length_encoding(this, $src);
17913 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17914 %}
17915 ins_pipe( pipe_slow );
17916 %}
17917
17918 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17919 match(Set dst (ConvHF2F src));
17920 format %{ "vcvtph2ps $dst,$src" %}
17921 ins_encode %{
17922 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17923 %}
17924 ins_pipe( pipe_slow );
17925 %}
17926
17927 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17928 match(Set dst (VectorCastHF2F (LoadVector mem)));
17929 format %{ "vcvtph2ps $dst,$mem" %}
17930 ins_encode %{
17931 int vlen_enc = vector_length_encoding(this);
17932 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17933 %}
17934 ins_pipe( pipe_slow );
17935 %}
17936
17937 instruct vconvHF2F(vec dst, vec src) %{
17938 match(Set dst (VectorCastHF2F src));
17939 ins_cost(125);
17940 format %{ "vector_conv_HF2F $dst,$src" %}
17941 ins_encode %{
17942 int vlen_enc = vector_length_encoding(this);
17943 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17944 %}
17945 ins_pipe( pipe_slow );
17946 %}
17947
17948 // ---------------------------------------- VectorReinterpret ------------------------------------
17949 instruct reinterpret_mask(kReg dst) %{
17950 predicate(n->bottom_type()->isa_vectmask() &&
17951 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17952 match(Set dst (VectorReinterpret dst));
17953 ins_cost(125);
17954 format %{ "vector_reinterpret $dst\t!" %}
17955 ins_encode %{
17956 // empty
17957 %}
17958 ins_pipe( pipe_slow );
17959 %}
17960
17961 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17962 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17963 n->bottom_type()->isa_vectmask() &&
17964 n->in(1)->bottom_type()->isa_vectmask() &&
17965 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
17966 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17967 match(Set dst (VectorReinterpret src));
17968 effect(TEMP xtmp);
17969 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17970 ins_encode %{
17971 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17972 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17973 assert(src_sz == dst_sz , "src and dst size mismatch");
17974 int vlen_enc = vector_length_encoding(src_sz);
17975 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17976 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17977 %}
17978 ins_pipe( pipe_slow );
17979 %}
17980
17981 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17982 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17983 n->bottom_type()->isa_vectmask() &&
17984 n->in(1)->bottom_type()->isa_vectmask() &&
17985 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
17986 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
17987 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17988 match(Set dst (VectorReinterpret src));
17989 effect(TEMP xtmp);
17990 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17991 ins_encode %{
17992 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17993 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17994 assert(src_sz == dst_sz , "src and dst size mismatch");
17995 int vlen_enc = vector_length_encoding(src_sz);
17996 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17997 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17998 %}
17999 ins_pipe( pipe_slow );
18000 %}
18001
18002 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18003 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18004 n->bottom_type()->isa_vectmask() &&
18005 n->in(1)->bottom_type()->isa_vectmask() &&
18006 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18007 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18008 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18009 match(Set dst (VectorReinterpret src));
18010 effect(TEMP xtmp);
18011 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18012 ins_encode %{
18013 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18014 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18015 assert(src_sz == dst_sz , "src and dst size mismatch");
18016 int vlen_enc = vector_length_encoding(src_sz);
18017 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18018 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18019 %}
18020 ins_pipe( pipe_slow );
18021 %}
18022
18023 instruct reinterpret(vec dst) %{
18024 predicate(!n->bottom_type()->isa_vectmask() &&
18025 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18026 match(Set dst (VectorReinterpret dst));
18027 ins_cost(125);
18028 format %{ "vector_reinterpret $dst\t!" %}
18029 ins_encode %{
18030 // empty
18031 %}
18032 ins_pipe( pipe_slow );
18033 %}
18034
18035 instruct reinterpret_expand(vec dst, vec src) %{
18036 predicate(UseAVX == 0 &&
18037 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18038 match(Set dst (VectorReinterpret src));
18039 ins_cost(125);
18040 effect(TEMP dst);
18041 format %{ "vector_reinterpret_expand $dst,$src" %}
18042 ins_encode %{
18043 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18044 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18045
18046 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18047 if (src_vlen_in_bytes == 4) {
18048 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18049 } else {
18050 assert(src_vlen_in_bytes == 8, "");
18051 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18052 }
18053 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18054 %}
18055 ins_pipe( pipe_slow );
18056 %}
18057
18058 instruct vreinterpret_expand4(legVec dst, vec src) %{
18059 predicate(UseAVX > 0 &&
18060 !n->bottom_type()->isa_vectmask() &&
18061 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18062 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18063 match(Set dst (VectorReinterpret src));
18064 ins_cost(125);
18065 format %{ "vector_reinterpret_expand $dst,$src" %}
18066 ins_encode %{
18067 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18068 %}
18069 ins_pipe( pipe_slow );
18070 %}
18071
18072
18073 instruct vreinterpret_expand(legVec dst, vec src) %{
18074 predicate(UseAVX > 0 &&
18075 !n->bottom_type()->isa_vectmask() &&
18076 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18077 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18078 match(Set dst (VectorReinterpret src));
18079 ins_cost(125);
18080 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18081 ins_encode %{
18082 switch (Matcher::vector_length_in_bytes(this, $src)) {
18083 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18084 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18085 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18086 default: ShouldNotReachHere();
18087 }
18088 %}
18089 ins_pipe( pipe_slow );
18090 %}
18091
18092 instruct reinterpret_shrink(vec dst, legVec src) %{
18093 predicate(!n->bottom_type()->isa_vectmask() &&
18094 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18095 match(Set dst (VectorReinterpret src));
18096 ins_cost(125);
18097 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18098 ins_encode %{
18099 switch (Matcher::vector_length_in_bytes(this)) {
18100 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18101 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18102 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18103 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18104 default: ShouldNotReachHere();
18105 }
18106 %}
18107 ins_pipe( pipe_slow );
18108 %}
18109
18110 // ----------------------------------------------------------------------------------------------------
18111
18112 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18113 match(Set dst (RoundDoubleMode src rmode));
18114 format %{ "roundsd $dst,$src" %}
18115 ins_cost(150);
18116 ins_encode %{
18117 assert(UseSSE >= 4, "required");
18118 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18119 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18120 }
18121 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18122 %}
18123 ins_pipe(pipe_slow);
18124 %}
18125
18126 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18127 match(Set dst (RoundDoubleMode con rmode));
18128 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18129 ins_cost(150);
18130 ins_encode %{
18131 assert(UseSSE >= 4, "required");
18132 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18133 %}
18134 ins_pipe(pipe_slow);
18135 %}
18136
18137 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18138 predicate(Matcher::vector_length(n) < 8);
18139 match(Set dst (RoundDoubleModeV src rmode));
18140 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18141 ins_encode %{
18142 assert(UseAVX > 0, "required");
18143 int vlen_enc = vector_length_encoding(this);
18144 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18145 %}
18146 ins_pipe( pipe_slow );
18147 %}
18148
18149 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18150 predicate(Matcher::vector_length(n) == 8);
18151 match(Set dst (RoundDoubleModeV src rmode));
18152 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18153 ins_encode %{
18154 assert(UseAVX > 2, "required");
18155 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18156 %}
18157 ins_pipe( pipe_slow );
18158 %}
18159
18160 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18161 predicate(Matcher::vector_length(n) < 8);
18162 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18163 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18164 ins_encode %{
18165 assert(UseAVX > 0, "required");
18166 int vlen_enc = vector_length_encoding(this);
18167 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18168 %}
18169 ins_pipe( pipe_slow );
18170 %}
18171
18172 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18173 predicate(Matcher::vector_length(n) == 8);
18174 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18175 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18176 ins_encode %{
18177 assert(UseAVX > 2, "required");
18178 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18179 %}
18180 ins_pipe( pipe_slow );
18181 %}
18182
18183 instruct onspinwait() %{
18184 match(OnSpinWait);
18185 ins_cost(200);
18186
18187 format %{
18188 $$template
18189 $$emit$$"pause\t! membar_onspinwait"
18190 %}
18191 ins_encode %{
18192 __ pause();
18193 %}
18194 ins_pipe(pipe_slow);
18195 %}
18196
18197 // a * b + c
18198 instruct fmaD_reg(regD a, regD b, regD c) %{
18199 match(Set c (FmaD c (Binary a b)));
18200 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18201 ins_cost(150);
18202 ins_encode %{
18203 assert(UseFMA, "Needs FMA instructions support.");
18204 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18205 %}
18206 ins_pipe( pipe_slow );
18207 %}
18208
18209 // a * b + c
18210 instruct fmaF_reg(regF a, regF b, regF c) %{
18211 match(Set c (FmaF c (Binary a b)));
18212 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18213 ins_cost(150);
18214 ins_encode %{
18215 assert(UseFMA, "Needs FMA instructions support.");
18216 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18217 %}
18218 ins_pipe( pipe_slow );
18219 %}
18220
18221 // ====================VECTOR INSTRUCTIONS=====================================
18222
18223 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18224 instruct MoveVec2Leg(legVec dst, vec src) %{
18225 match(Set dst src);
18226 format %{ "" %}
18227 ins_encode %{
18228 ShouldNotReachHere();
18229 %}
18230 ins_pipe( fpu_reg_reg );
18231 %}
18232
18233 instruct MoveLeg2Vec(vec dst, legVec src) %{
18234 match(Set dst src);
18235 format %{ "" %}
18236 ins_encode %{
18237 ShouldNotReachHere();
18238 %}
18239 ins_pipe( fpu_reg_reg );
18240 %}
18241
18242 // ============================================================================
18243
18244 // Load vectors generic operand pattern
18245 instruct loadV(vec dst, memory mem) %{
18246 match(Set dst (LoadVector mem));
18247 ins_cost(125);
18248 format %{ "load_vector $dst,$mem" %}
18249 ins_encode %{
18250 BasicType bt = Matcher::vector_element_basic_type(this);
18251 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18252 %}
18253 ins_pipe( pipe_slow );
18254 %}
18255
18256 // Store vectors generic operand pattern.
18257 instruct storeV(memory mem, vec src) %{
18258 match(Set mem (StoreVector mem src));
18259 ins_cost(145);
18260 format %{ "store_vector $mem,$src\n\t" %}
18261 ins_encode %{
18262 switch (Matcher::vector_length_in_bytes(this, $src)) {
18263 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18264 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18265 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18266 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18267 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18268 default: ShouldNotReachHere();
18269 }
18270 %}
18271 ins_pipe( pipe_slow );
18272 %}
18273
18274 // ---------------------------------------- Gather ------------------------------------
18275
18276 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18277
18278 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18279 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18280 Matcher::vector_length_in_bytes(n) <= 32);
18281 match(Set dst (LoadVectorGather mem idx));
18282 effect(TEMP dst, TEMP tmp, TEMP mask);
18283 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18284 ins_encode %{
18285 int vlen_enc = vector_length_encoding(this);
18286 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18287 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18288 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18289 __ lea($tmp$$Register, $mem$$Address);
18290 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18291 %}
18292 ins_pipe( pipe_slow );
18293 %}
18294
18295
18296 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18297 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18298 !is_subword_type(Matcher::vector_element_basic_type(n)));
18299 match(Set dst (LoadVectorGather mem idx));
18300 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18301 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18302 ins_encode %{
18303 int vlen_enc = vector_length_encoding(this);
18304 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18305 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18306 __ lea($tmp$$Register, $mem$$Address);
18307 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18308 %}
18309 ins_pipe( pipe_slow );
18310 %}
18311
18312 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18313 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18314 !is_subword_type(Matcher::vector_element_basic_type(n)));
18315 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18316 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18317 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18318 ins_encode %{
18319 assert(UseAVX > 2, "sanity");
18320 int vlen_enc = vector_length_encoding(this);
18321 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18322 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18323 // Note: Since gather instruction partially updates the opmask register used
18324 // for predication hense moving mask operand to a temporary.
18325 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18326 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18327 __ lea($tmp$$Register, $mem$$Address);
18328 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18329 %}
18330 ins_pipe( pipe_slow );
18331 %}
18332
18333 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18334 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18335 match(Set dst (LoadVectorGather mem idx_base));
18336 effect(TEMP tmp, TEMP rtmp);
18337 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18338 ins_encode %{
18339 int vlen_enc = vector_length_encoding(this);
18340 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18341 __ lea($tmp$$Register, $mem$$Address);
18342 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18343 %}
18344 ins_pipe( pipe_slow );
18345 %}
18346
18347 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18348 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18349 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18350 match(Set dst (LoadVectorGather mem idx_base));
18351 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18352 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18353 ins_encode %{
18354 int vlen_enc = vector_length_encoding(this);
18355 int vector_len = Matcher::vector_length(this);
18356 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18357 __ lea($tmp$$Register, $mem$$Address);
18358 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18359 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18360 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18361 %}
18362 ins_pipe( pipe_slow );
18363 %}
18364
18365 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18366 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18367 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18368 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18369 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18370 ins_encode %{
18371 int vlen_enc = vector_length_encoding(this);
18372 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18373 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18374 __ lea($tmp$$Register, $mem$$Address);
18375 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18376 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18377 %}
18378 ins_pipe( pipe_slow );
18379 %}
18380
18381 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18382 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18383 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18384 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18385 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18386 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18387 ins_encode %{
18388 int vlen_enc = vector_length_encoding(this);
18389 int vector_len = Matcher::vector_length(this);
18390 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18391 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18392 __ lea($tmp$$Register, $mem$$Address);
18393 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18394 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18395 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18396 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18397 %}
18398 ins_pipe( pipe_slow );
18399 %}
18400
18401 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18402 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18403 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18404 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18405 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18406 ins_encode %{
18407 int vlen_enc = vector_length_encoding(this);
18408 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18409 __ lea($tmp$$Register, $mem$$Address);
18410 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18411 if (elem_bt == T_SHORT) {
18412 __ movl($mask_idx$$Register, 0x55555555);
18413 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18414 }
18415 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18416 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18417 %}
18418 ins_pipe( pipe_slow );
18419 %}
18420
18421 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18422 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18423 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18424 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18425 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18426 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18427 ins_encode %{
18428 int vlen_enc = vector_length_encoding(this);
18429 int vector_len = Matcher::vector_length(this);
18430 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18431 __ lea($tmp$$Register, $mem$$Address);
18432 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18433 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18434 if (elem_bt == T_SHORT) {
18435 __ movl($mask_idx$$Register, 0x55555555);
18436 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18437 }
18438 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18439 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18440 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18441 %}
18442 ins_pipe( pipe_slow );
18443 %}
18444
18445 // ====================Scatter=======================================
18446
18447 // Scatter INT, LONG, FLOAT, DOUBLE
18448
18449 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18450 predicate(UseAVX > 2);
18451 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18452 effect(TEMP tmp, TEMP ktmp);
18453 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18454 ins_encode %{
18455 int vlen_enc = vector_length_encoding(this, $src);
18456 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18457
18458 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18459 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18460
18461 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18462 __ lea($tmp$$Register, $mem$$Address);
18463 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18464 %}
18465 ins_pipe( pipe_slow );
18466 %}
18467
18468 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18469 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18470 effect(TEMP tmp, TEMP ktmp);
18471 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18472 ins_encode %{
18473 int vlen_enc = vector_length_encoding(this, $src);
18474 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18475 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18476 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18477 // Note: Since scatter instruction partially updates the opmask register used
18478 // for predication hense moving mask operand to a temporary.
18479 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18480 __ lea($tmp$$Register, $mem$$Address);
18481 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18482 %}
18483 ins_pipe( pipe_slow );
18484 %}
18485
18486 // ====================REPLICATE=======================================
18487
18488 // Replicate byte scalar to be vector
18489 instruct vReplB_reg(vec dst, rRegI src) %{
18490 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18491 match(Set dst (Replicate src));
18492 format %{ "replicateB $dst,$src" %}
18493 ins_encode %{
18494 uint vlen = Matcher::vector_length(this);
18495 if (UseAVX >= 2) {
18496 int vlen_enc = vector_length_encoding(this);
18497 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18498 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18499 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18500 } else {
18501 __ movdl($dst$$XMMRegister, $src$$Register);
18502 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18503 }
18504 } else {
18505 assert(UseAVX < 2, "");
18506 __ movdl($dst$$XMMRegister, $src$$Register);
18507 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18508 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18509 if (vlen >= 16) {
18510 assert(vlen == 16, "");
18511 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18512 }
18513 }
18514 %}
18515 ins_pipe( pipe_slow );
18516 %}
18517
18518 instruct ReplB_mem(vec dst, memory mem) %{
18519 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18520 match(Set dst (Replicate (LoadB mem)));
18521 format %{ "replicateB $dst,$mem" %}
18522 ins_encode %{
18523 int vlen_enc = vector_length_encoding(this);
18524 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18525 %}
18526 ins_pipe( pipe_slow );
18527 %}
18528
18529 // ====================ReplicateS=======================================
18530
18531 instruct vReplS_reg(vec dst, rRegI src) %{
18532 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18533 match(Set dst (Replicate src));
18534 format %{ "replicateS $dst,$src" %}
18535 ins_encode %{
18536 uint vlen = Matcher::vector_length(this);
18537 int vlen_enc = vector_length_encoding(this);
18538 if (UseAVX >= 2) {
18539 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18540 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18541 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18542 } else {
18543 __ movdl($dst$$XMMRegister, $src$$Register);
18544 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18545 }
18546 } else {
18547 assert(UseAVX < 2, "");
18548 __ movdl($dst$$XMMRegister, $src$$Register);
18549 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18550 if (vlen >= 8) {
18551 assert(vlen == 8, "");
18552 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18553 }
18554 }
18555 %}
18556 ins_pipe( pipe_slow );
18557 %}
18558
18559 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18560 match(Set dst (Replicate con));
18561 effect(TEMP rtmp);
18562 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18563 ins_encode %{
18564 int vlen_enc = vector_length_encoding(this);
18565 BasicType bt = Matcher::vector_element_basic_type(this);
18566 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18567 __ movl($rtmp$$Register, $con$$constant);
18568 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18569 %}
18570 ins_pipe( pipe_slow );
18571 %}
18572
18573 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18574 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18575 match(Set dst (Replicate src));
18576 effect(TEMP rtmp);
18577 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18578 ins_encode %{
18579 int vlen_enc = vector_length_encoding(this);
18580 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18581 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18582 %}
18583 ins_pipe( pipe_slow );
18584 %}
18585
18586 instruct ReplS_mem(vec dst, memory mem) %{
18587 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18588 match(Set dst (Replicate (LoadS mem)));
18589 format %{ "replicateS $dst,$mem" %}
18590 ins_encode %{
18591 int vlen_enc = vector_length_encoding(this);
18592 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18593 %}
18594 ins_pipe( pipe_slow );
18595 %}
18596
18597 // ====================ReplicateI=======================================
18598
18599 instruct ReplI_reg(vec dst, rRegI src) %{
18600 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18601 match(Set dst (Replicate src));
18602 format %{ "replicateI $dst,$src" %}
18603 ins_encode %{
18604 uint vlen = Matcher::vector_length(this);
18605 int vlen_enc = vector_length_encoding(this);
18606 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18607 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18608 } else if (VM_Version::supports_avx2()) {
18609 __ movdl($dst$$XMMRegister, $src$$Register);
18610 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18611 } else {
18612 __ movdl($dst$$XMMRegister, $src$$Register);
18613 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18614 }
18615 %}
18616 ins_pipe( pipe_slow );
18617 %}
18618
18619 instruct ReplI_mem(vec dst, memory mem) %{
18620 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18621 match(Set dst (Replicate (LoadI mem)));
18622 format %{ "replicateI $dst,$mem" %}
18623 ins_encode %{
18624 int vlen_enc = vector_length_encoding(this);
18625 if (VM_Version::supports_avx2()) {
18626 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18627 } else if (VM_Version::supports_avx()) {
18628 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18629 } else {
18630 __ movdl($dst$$XMMRegister, $mem$$Address);
18631 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18632 }
18633 %}
18634 ins_pipe( pipe_slow );
18635 %}
18636
18637 instruct ReplI_imm(vec dst, immI con) %{
18638 predicate(Matcher::is_non_long_integral_vector(n));
18639 match(Set dst (Replicate con));
18640 format %{ "replicateI $dst,$con" %}
18641 ins_encode %{
18642 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18643 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18644 type2aelembytes(Matcher::vector_element_basic_type(this))));
18645 BasicType bt = Matcher::vector_element_basic_type(this);
18646 int vlen = Matcher::vector_length_in_bytes(this);
18647 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18648 %}
18649 ins_pipe( pipe_slow );
18650 %}
18651
18652 // Replicate scalar zero to be vector
18653 instruct ReplI_zero(vec dst, immI_0 zero) %{
18654 predicate(Matcher::is_non_long_integral_vector(n));
18655 match(Set dst (Replicate zero));
18656 format %{ "replicateI $dst,$zero" %}
18657 ins_encode %{
18658 int vlen_enc = vector_length_encoding(this);
18659 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18660 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18661 } else {
18662 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18663 }
18664 %}
18665 ins_pipe( fpu_reg_reg );
18666 %}
18667
18668 instruct ReplI_M1(vec dst, immI_M1 con) %{
18669 predicate(Matcher::is_non_long_integral_vector(n));
18670 match(Set dst (Replicate con));
18671 format %{ "vallones $dst" %}
18672 ins_encode %{
18673 int vector_len = vector_length_encoding(this);
18674 __ vallones($dst$$XMMRegister, vector_len);
18675 %}
18676 ins_pipe( pipe_slow );
18677 %}
18678
18679 // ====================ReplicateL=======================================
18680
18681 // Replicate long (8 byte) scalar to be vector
18682 instruct ReplL_reg(vec dst, rRegL src) %{
18683 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18684 match(Set dst (Replicate src));
18685 format %{ "replicateL $dst,$src" %}
18686 ins_encode %{
18687 int vlen = Matcher::vector_length(this);
18688 int vlen_enc = vector_length_encoding(this);
18689 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18690 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18691 } else if (VM_Version::supports_avx2()) {
18692 __ movdq($dst$$XMMRegister, $src$$Register);
18693 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18694 } else {
18695 __ movdq($dst$$XMMRegister, $src$$Register);
18696 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18697 }
18698 %}
18699 ins_pipe( pipe_slow );
18700 %}
18701
18702 instruct ReplL_mem(vec dst, memory mem) %{
18703 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18704 match(Set dst (Replicate (LoadL mem)));
18705 format %{ "replicateL $dst,$mem" %}
18706 ins_encode %{
18707 int vlen_enc = vector_length_encoding(this);
18708 if (VM_Version::supports_avx2()) {
18709 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18710 } else if (VM_Version::supports_sse3()) {
18711 __ movddup($dst$$XMMRegister, $mem$$Address);
18712 } else {
18713 __ movq($dst$$XMMRegister, $mem$$Address);
18714 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18715 }
18716 %}
18717 ins_pipe( pipe_slow );
18718 %}
18719
18720 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18721 instruct ReplL_imm(vec dst, immL con) %{
18722 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18723 match(Set dst (Replicate con));
18724 format %{ "replicateL $dst,$con" %}
18725 ins_encode %{
18726 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18727 int vlen = Matcher::vector_length_in_bytes(this);
18728 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18729 %}
18730 ins_pipe( pipe_slow );
18731 %}
18732
18733 instruct ReplL_zero(vec dst, immL0 zero) %{
18734 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18735 match(Set dst (Replicate zero));
18736 format %{ "replicateL $dst,$zero" %}
18737 ins_encode %{
18738 int vlen_enc = vector_length_encoding(this);
18739 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18740 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18741 } else {
18742 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18743 }
18744 %}
18745 ins_pipe( fpu_reg_reg );
18746 %}
18747
18748 instruct ReplL_M1(vec dst, immL_M1 con) %{
18749 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18750 match(Set dst (Replicate con));
18751 format %{ "vallones $dst" %}
18752 ins_encode %{
18753 int vector_len = vector_length_encoding(this);
18754 __ vallones($dst$$XMMRegister, vector_len);
18755 %}
18756 ins_pipe( pipe_slow );
18757 %}
18758
18759 // ====================ReplicateF=======================================
18760
18761 instruct vReplF_reg(vec dst, vlRegF src) %{
18762 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18763 match(Set dst (Replicate src));
18764 format %{ "replicateF $dst,$src" %}
18765 ins_encode %{
18766 uint vlen = Matcher::vector_length(this);
18767 int vlen_enc = vector_length_encoding(this);
18768 if (vlen <= 4) {
18769 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18770 } else if (VM_Version::supports_avx2()) {
18771 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18772 } else {
18773 assert(vlen == 8, "sanity");
18774 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18775 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18776 }
18777 %}
18778 ins_pipe( pipe_slow );
18779 %}
18780
18781 instruct ReplF_reg(vec dst, vlRegF src) %{
18782 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18783 match(Set dst (Replicate src));
18784 format %{ "replicateF $dst,$src" %}
18785 ins_encode %{
18786 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18787 %}
18788 ins_pipe( pipe_slow );
18789 %}
18790
18791 instruct ReplF_mem(vec dst, memory mem) %{
18792 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18793 match(Set dst (Replicate (LoadF mem)));
18794 format %{ "replicateF $dst,$mem" %}
18795 ins_encode %{
18796 int vlen_enc = vector_length_encoding(this);
18797 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18798 %}
18799 ins_pipe( pipe_slow );
18800 %}
18801
18802 // Replicate float scalar immediate to be vector by loading from const table.
18803 instruct ReplF_imm(vec dst, immF con) %{
18804 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18805 match(Set dst (Replicate con));
18806 format %{ "replicateF $dst,$con" %}
18807 ins_encode %{
18808 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18809 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18810 int vlen = Matcher::vector_length_in_bytes(this);
18811 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18812 %}
18813 ins_pipe( pipe_slow );
18814 %}
18815
18816 instruct ReplF_zero(vec dst, immF0 zero) %{
18817 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18818 match(Set dst (Replicate zero));
18819 format %{ "replicateF $dst,$zero" %}
18820 ins_encode %{
18821 int vlen_enc = vector_length_encoding(this);
18822 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18823 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18824 } else {
18825 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18826 }
18827 %}
18828 ins_pipe( fpu_reg_reg );
18829 %}
18830
18831 // ====================ReplicateD=======================================
18832
18833 // Replicate double (8 bytes) scalar to be vector
18834 instruct vReplD_reg(vec dst, vlRegD src) %{
18835 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18836 match(Set dst (Replicate src));
18837 format %{ "replicateD $dst,$src" %}
18838 ins_encode %{
18839 uint vlen = Matcher::vector_length(this);
18840 int vlen_enc = vector_length_encoding(this);
18841 if (vlen <= 2) {
18842 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18843 } else if (VM_Version::supports_avx2()) {
18844 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18845 } else {
18846 assert(vlen == 4, "sanity");
18847 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18848 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18849 }
18850 %}
18851 ins_pipe( pipe_slow );
18852 %}
18853
18854 instruct ReplD_reg(vec dst, vlRegD src) %{
18855 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18856 match(Set dst (Replicate src));
18857 format %{ "replicateD $dst,$src" %}
18858 ins_encode %{
18859 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18860 %}
18861 ins_pipe( pipe_slow );
18862 %}
18863
18864 instruct ReplD_mem(vec dst, memory mem) %{
18865 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18866 match(Set dst (Replicate (LoadD mem)));
18867 format %{ "replicateD $dst,$mem" %}
18868 ins_encode %{
18869 if (Matcher::vector_length(this) >= 4) {
18870 int vlen_enc = vector_length_encoding(this);
18871 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18872 } else {
18873 __ movddup($dst$$XMMRegister, $mem$$Address);
18874 }
18875 %}
18876 ins_pipe( pipe_slow );
18877 %}
18878
18879 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18880 instruct ReplD_imm(vec dst, immD con) %{
18881 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18882 match(Set dst (Replicate con));
18883 format %{ "replicateD $dst,$con" %}
18884 ins_encode %{
18885 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18886 int vlen = Matcher::vector_length_in_bytes(this);
18887 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18888 %}
18889 ins_pipe( pipe_slow );
18890 %}
18891
18892 instruct ReplD_zero(vec dst, immD0 zero) %{
18893 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18894 match(Set dst (Replicate zero));
18895 format %{ "replicateD $dst,$zero" %}
18896 ins_encode %{
18897 int vlen_enc = vector_length_encoding(this);
18898 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18899 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18900 } else {
18901 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18902 }
18903 %}
18904 ins_pipe( fpu_reg_reg );
18905 %}
18906
18907 // ====================VECTOR INSERT=======================================
18908
18909 instruct insert(vec dst, rRegI val, immU8 idx) %{
18910 predicate(Matcher::vector_length_in_bytes(n) < 32);
18911 match(Set dst (VectorInsert (Binary dst val) idx));
18912 format %{ "vector_insert $dst,$val,$idx" %}
18913 ins_encode %{
18914 assert(UseSSE >= 4, "required");
18915 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18916
18917 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18918
18919 assert(is_integral_type(elem_bt), "");
18920 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18921
18922 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18923 %}
18924 ins_pipe( pipe_slow );
18925 %}
18926
18927 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18928 predicate(Matcher::vector_length_in_bytes(n) == 32);
18929 match(Set dst (VectorInsert (Binary src val) idx));
18930 effect(TEMP vtmp);
18931 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18932 ins_encode %{
18933 int vlen_enc = Assembler::AVX_256bit;
18934 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18935 int elem_per_lane = 16/type2aelembytes(elem_bt);
18936 int log2epr = log2(elem_per_lane);
18937
18938 assert(is_integral_type(elem_bt), "sanity");
18939 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18940
18941 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18942 uint y_idx = ($idx$$constant >> log2epr) & 1;
18943 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18944 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18945 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18946 %}
18947 ins_pipe( pipe_slow );
18948 %}
18949
18950 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18951 predicate(Matcher::vector_length_in_bytes(n) == 64);
18952 match(Set dst (VectorInsert (Binary src val) idx));
18953 effect(TEMP vtmp);
18954 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18955 ins_encode %{
18956 assert(UseAVX > 2, "sanity");
18957
18958 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18959 int elem_per_lane = 16/type2aelembytes(elem_bt);
18960 int log2epr = log2(elem_per_lane);
18961
18962 assert(is_integral_type(elem_bt), "");
18963 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18964
18965 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18966 uint y_idx = ($idx$$constant >> log2epr) & 3;
18967 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18968 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18969 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18970 %}
18971 ins_pipe( pipe_slow );
18972 %}
18973
18974 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18975 predicate(Matcher::vector_length(n) == 2);
18976 match(Set dst (VectorInsert (Binary dst val) idx));
18977 format %{ "vector_insert $dst,$val,$idx" %}
18978 ins_encode %{
18979 assert(UseSSE >= 4, "required");
18980 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18981 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18982
18983 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18984 %}
18985 ins_pipe( pipe_slow );
18986 %}
18987
18988 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18989 predicate(Matcher::vector_length(n) == 4);
18990 match(Set dst (VectorInsert (Binary src val) idx));
18991 effect(TEMP vtmp);
18992 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18993 ins_encode %{
18994 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18995 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18996
18997 uint x_idx = $idx$$constant & right_n_bits(1);
18998 uint y_idx = ($idx$$constant >> 1) & 1;
18999 int vlen_enc = Assembler::AVX_256bit;
19000 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19001 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19002 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19003 %}
19004 ins_pipe( pipe_slow );
19005 %}
19006
19007 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19008 predicate(Matcher::vector_length(n) == 8);
19009 match(Set dst (VectorInsert (Binary src val) idx));
19010 effect(TEMP vtmp);
19011 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19012 ins_encode %{
19013 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19014 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19015
19016 uint x_idx = $idx$$constant & right_n_bits(1);
19017 uint y_idx = ($idx$$constant >> 1) & 3;
19018 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19019 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19020 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19021 %}
19022 ins_pipe( pipe_slow );
19023 %}
19024
19025 instruct insertF(vec dst, regF val, immU8 idx) %{
19026 predicate(Matcher::vector_length(n) < 8);
19027 match(Set dst (VectorInsert (Binary dst val) idx));
19028 format %{ "vector_insert $dst,$val,$idx" %}
19029 ins_encode %{
19030 assert(UseSSE >= 4, "sanity");
19031
19032 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19033 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19034
19035 uint x_idx = $idx$$constant & right_n_bits(2);
19036 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19037 %}
19038 ins_pipe( pipe_slow );
19039 %}
19040
19041 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19042 predicate(Matcher::vector_length(n) >= 8);
19043 match(Set dst (VectorInsert (Binary src val) idx));
19044 effect(TEMP vtmp);
19045 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19046 ins_encode %{
19047 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19048 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19049
19050 int vlen = Matcher::vector_length(this);
19051 uint x_idx = $idx$$constant & right_n_bits(2);
19052 if (vlen == 8) {
19053 uint y_idx = ($idx$$constant >> 2) & 1;
19054 int vlen_enc = Assembler::AVX_256bit;
19055 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19056 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19057 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19058 } else {
19059 assert(vlen == 16, "sanity");
19060 uint y_idx = ($idx$$constant >> 2) & 3;
19061 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19062 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19063 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19064 }
19065 %}
19066 ins_pipe( pipe_slow );
19067 %}
19068
19069 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19070 predicate(Matcher::vector_length(n) == 2);
19071 match(Set dst (VectorInsert (Binary dst val) idx));
19072 effect(TEMP tmp);
19073 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19074 ins_encode %{
19075 assert(UseSSE >= 4, "sanity");
19076 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19077 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19078
19079 __ movq($tmp$$Register, $val$$XMMRegister);
19080 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19081 %}
19082 ins_pipe( pipe_slow );
19083 %}
19084
19085 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19086 predicate(Matcher::vector_length(n) == 4);
19087 match(Set dst (VectorInsert (Binary src val) idx));
19088 effect(TEMP vtmp, TEMP tmp);
19089 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19090 ins_encode %{
19091 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19092 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19093
19094 uint x_idx = $idx$$constant & right_n_bits(1);
19095 uint y_idx = ($idx$$constant >> 1) & 1;
19096 int vlen_enc = Assembler::AVX_256bit;
19097 __ movq($tmp$$Register, $val$$XMMRegister);
19098 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19099 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19100 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19101 %}
19102 ins_pipe( pipe_slow );
19103 %}
19104
19105 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19106 predicate(Matcher::vector_length(n) == 8);
19107 match(Set dst (VectorInsert (Binary src val) idx));
19108 effect(TEMP tmp, TEMP vtmp);
19109 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19110 ins_encode %{
19111 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19112 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19113
19114 uint x_idx = $idx$$constant & right_n_bits(1);
19115 uint y_idx = ($idx$$constant >> 1) & 3;
19116 __ movq($tmp$$Register, $val$$XMMRegister);
19117 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19118 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19119 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19120 %}
19121 ins_pipe( pipe_slow );
19122 %}
19123
19124 // ====================REDUCTION ARITHMETIC=======================================
19125
19126 // =======================Int Reduction==========================================
19127
19128 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19129 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19130 match(Set dst (AddReductionVI src1 src2));
19131 match(Set dst (MulReductionVI src1 src2));
19132 match(Set dst (AndReductionV src1 src2));
19133 match(Set dst ( OrReductionV src1 src2));
19134 match(Set dst (XorReductionV src1 src2));
19135 match(Set dst (MinReductionV src1 src2));
19136 match(Set dst (MaxReductionV src1 src2));
19137 effect(TEMP vtmp1, TEMP vtmp2);
19138 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19139 ins_encode %{
19140 int opcode = this->ideal_Opcode();
19141 int vlen = Matcher::vector_length(this, $src2);
19142 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19143 %}
19144 ins_pipe( pipe_slow );
19145 %}
19146
19147 // =======================Long Reduction==========================================
19148
19149 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19150 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19151 match(Set dst (AddReductionVL src1 src2));
19152 match(Set dst (MulReductionVL src1 src2));
19153 match(Set dst (AndReductionV src1 src2));
19154 match(Set dst ( OrReductionV src1 src2));
19155 match(Set dst (XorReductionV src1 src2));
19156 match(Set dst (MinReductionV src1 src2));
19157 match(Set dst (MaxReductionV src1 src2));
19158 effect(TEMP vtmp1, TEMP vtmp2);
19159 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19160 ins_encode %{
19161 int opcode = this->ideal_Opcode();
19162 int vlen = Matcher::vector_length(this, $src2);
19163 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19164 %}
19165 ins_pipe( pipe_slow );
19166 %}
19167
19168 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19169 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19170 match(Set dst (AddReductionVL src1 src2));
19171 match(Set dst (MulReductionVL src1 src2));
19172 match(Set dst (AndReductionV src1 src2));
19173 match(Set dst ( OrReductionV src1 src2));
19174 match(Set dst (XorReductionV src1 src2));
19175 match(Set dst (MinReductionV src1 src2));
19176 match(Set dst (MaxReductionV src1 src2));
19177 effect(TEMP vtmp1, TEMP vtmp2);
19178 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19179 ins_encode %{
19180 int opcode = this->ideal_Opcode();
19181 int vlen = Matcher::vector_length(this, $src2);
19182 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19183 %}
19184 ins_pipe( pipe_slow );
19185 %}
19186
19187 // =======================Float Reduction==========================================
19188
19189 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19190 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19191 match(Set dst (AddReductionVF dst src));
19192 match(Set dst (MulReductionVF dst src));
19193 effect(TEMP dst, TEMP vtmp);
19194 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19195 ins_encode %{
19196 int opcode = this->ideal_Opcode();
19197 int vlen = Matcher::vector_length(this, $src);
19198 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19199 %}
19200 ins_pipe( pipe_slow );
19201 %}
19202
19203 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19204 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19205 match(Set dst (AddReductionVF dst src));
19206 match(Set dst (MulReductionVF dst src));
19207 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19208 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19209 ins_encode %{
19210 int opcode = this->ideal_Opcode();
19211 int vlen = Matcher::vector_length(this, $src);
19212 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19213 %}
19214 ins_pipe( pipe_slow );
19215 %}
19216
19217 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19218 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19219 match(Set dst (AddReductionVF dst src));
19220 match(Set dst (MulReductionVF dst src));
19221 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19222 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19223 ins_encode %{
19224 int opcode = this->ideal_Opcode();
19225 int vlen = Matcher::vector_length(this, $src);
19226 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19227 %}
19228 ins_pipe( pipe_slow );
19229 %}
19230
19231
19232 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19233 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19234 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19235 // src1 contains reduction identity
19236 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19237 match(Set dst (AddReductionVF src1 src2));
19238 match(Set dst (MulReductionVF src1 src2));
19239 effect(TEMP dst);
19240 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19241 ins_encode %{
19242 int opcode = this->ideal_Opcode();
19243 int vlen = Matcher::vector_length(this, $src2);
19244 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19245 %}
19246 ins_pipe( pipe_slow );
19247 %}
19248
19249 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19250 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19251 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19252 // src1 contains reduction identity
19253 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19254 match(Set dst (AddReductionVF src1 src2));
19255 match(Set dst (MulReductionVF src1 src2));
19256 effect(TEMP dst, TEMP vtmp);
19257 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19258 ins_encode %{
19259 int opcode = this->ideal_Opcode();
19260 int vlen = Matcher::vector_length(this, $src2);
19261 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19262 %}
19263 ins_pipe( pipe_slow );
19264 %}
19265
19266 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19267 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19268 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19269 // src1 contains reduction identity
19270 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19271 match(Set dst (AddReductionVF src1 src2));
19272 match(Set dst (MulReductionVF src1 src2));
19273 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19274 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19275 ins_encode %{
19276 int opcode = this->ideal_Opcode();
19277 int vlen = Matcher::vector_length(this, $src2);
19278 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19279 %}
19280 ins_pipe( pipe_slow );
19281 %}
19282
19283 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19284 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19285 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19286 // src1 contains reduction identity
19287 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19288 match(Set dst (AddReductionVF src1 src2));
19289 match(Set dst (MulReductionVF src1 src2));
19290 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19291 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19292 ins_encode %{
19293 int opcode = this->ideal_Opcode();
19294 int vlen = Matcher::vector_length(this, $src2);
19295 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19296 %}
19297 ins_pipe( pipe_slow );
19298 %}
19299
19300 // =======================Double Reduction==========================================
19301
19302 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19303 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19304 match(Set dst (AddReductionVD dst src));
19305 match(Set dst (MulReductionVD dst src));
19306 effect(TEMP dst, TEMP vtmp);
19307 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19308 ins_encode %{
19309 int opcode = this->ideal_Opcode();
19310 int vlen = Matcher::vector_length(this, $src);
19311 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19312 %}
19313 ins_pipe( pipe_slow );
19314 %}
19315
19316 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19317 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19318 match(Set dst (AddReductionVD dst src));
19319 match(Set dst (MulReductionVD dst src));
19320 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19321 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19322 ins_encode %{
19323 int opcode = this->ideal_Opcode();
19324 int vlen = Matcher::vector_length(this, $src);
19325 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19326 %}
19327 ins_pipe( pipe_slow );
19328 %}
19329
19330 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19331 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19332 match(Set dst (AddReductionVD dst src));
19333 match(Set dst (MulReductionVD dst src));
19334 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19335 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19336 ins_encode %{
19337 int opcode = this->ideal_Opcode();
19338 int vlen = Matcher::vector_length(this, $src);
19339 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19340 %}
19341 ins_pipe( pipe_slow );
19342 %}
19343
19344 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19345 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19346 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19347 // src1 contains reduction identity
19348 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19349 match(Set dst (AddReductionVD src1 src2));
19350 match(Set dst (MulReductionVD src1 src2));
19351 effect(TEMP dst);
19352 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19353 ins_encode %{
19354 int opcode = this->ideal_Opcode();
19355 int vlen = Matcher::vector_length(this, $src2);
19356 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19357 %}
19358 ins_pipe( pipe_slow );
19359 %}
19360
19361 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19362 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19363 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19364 // src1 contains reduction identity
19365 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19366 match(Set dst (AddReductionVD src1 src2));
19367 match(Set dst (MulReductionVD src1 src2));
19368 effect(TEMP dst, TEMP vtmp);
19369 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19370 ins_encode %{
19371 int opcode = this->ideal_Opcode();
19372 int vlen = Matcher::vector_length(this, $src2);
19373 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19374 %}
19375 ins_pipe( pipe_slow );
19376 %}
19377
19378 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19379 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19380 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19381 // src1 contains reduction identity
19382 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19383 match(Set dst (AddReductionVD src1 src2));
19384 match(Set dst (MulReductionVD src1 src2));
19385 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19386 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19387 ins_encode %{
19388 int opcode = this->ideal_Opcode();
19389 int vlen = Matcher::vector_length(this, $src2);
19390 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19391 %}
19392 ins_pipe( pipe_slow );
19393 %}
19394
19395 // =======================Byte Reduction==========================================
19396
19397 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19398 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19399 match(Set dst (AddReductionVI src1 src2));
19400 match(Set dst (AndReductionV src1 src2));
19401 match(Set dst ( OrReductionV src1 src2));
19402 match(Set dst (XorReductionV src1 src2));
19403 match(Set dst (MinReductionV src1 src2));
19404 match(Set dst (MaxReductionV src1 src2));
19405 effect(TEMP vtmp1, TEMP vtmp2);
19406 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19407 ins_encode %{
19408 int opcode = this->ideal_Opcode();
19409 int vlen = Matcher::vector_length(this, $src2);
19410 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19411 %}
19412 ins_pipe( pipe_slow );
19413 %}
19414
19415 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19416 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19417 match(Set dst (AddReductionVI src1 src2));
19418 match(Set dst (AndReductionV src1 src2));
19419 match(Set dst ( OrReductionV src1 src2));
19420 match(Set dst (XorReductionV src1 src2));
19421 match(Set dst (MinReductionV src1 src2));
19422 match(Set dst (MaxReductionV src1 src2));
19423 effect(TEMP vtmp1, TEMP vtmp2);
19424 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19425 ins_encode %{
19426 int opcode = this->ideal_Opcode();
19427 int vlen = Matcher::vector_length(this, $src2);
19428 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19429 %}
19430 ins_pipe( pipe_slow );
19431 %}
19432
19433 // =======================Short Reduction==========================================
19434
19435 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19436 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19437 match(Set dst (AddReductionVI src1 src2));
19438 match(Set dst (MulReductionVI src1 src2));
19439 match(Set dst (AndReductionV src1 src2));
19440 match(Set dst ( OrReductionV src1 src2));
19441 match(Set dst (XorReductionV src1 src2));
19442 match(Set dst (MinReductionV src1 src2));
19443 match(Set dst (MaxReductionV src1 src2));
19444 effect(TEMP vtmp1, TEMP vtmp2);
19445 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19446 ins_encode %{
19447 int opcode = this->ideal_Opcode();
19448 int vlen = Matcher::vector_length(this, $src2);
19449 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19450 %}
19451 ins_pipe( pipe_slow );
19452 %}
19453
19454 // =======================Mul Reduction==========================================
19455
19456 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19457 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19458 Matcher::vector_length(n->in(2)) <= 32); // src2
19459 match(Set dst (MulReductionVI src1 src2));
19460 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19461 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19462 ins_encode %{
19463 int opcode = this->ideal_Opcode();
19464 int vlen = Matcher::vector_length(this, $src2);
19465 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19466 %}
19467 ins_pipe( pipe_slow );
19468 %}
19469
19470 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19471 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19472 Matcher::vector_length(n->in(2)) == 64); // src2
19473 match(Set dst (MulReductionVI src1 src2));
19474 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19475 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19476 ins_encode %{
19477 int opcode = this->ideal_Opcode();
19478 int vlen = Matcher::vector_length(this, $src2);
19479 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19480 %}
19481 ins_pipe( pipe_slow );
19482 %}
19483
19484 //--------------------Min/Max Float Reduction --------------------
19485 // Float Min Reduction
19486 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19487 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19488 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19489 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19490 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19491 Matcher::vector_length(n->in(2)) == 2);
19492 match(Set dst (MinReductionV src1 src2));
19493 match(Set dst (MaxReductionV src1 src2));
19494 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19495 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19496 ins_encode %{
19497 assert(UseAVX > 0, "sanity");
19498
19499 int opcode = this->ideal_Opcode();
19500 int vlen = Matcher::vector_length(this, $src2);
19501 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19502 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19503 %}
19504 ins_pipe( pipe_slow );
19505 %}
19506
19507 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19508 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19509 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19510 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19511 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19512 Matcher::vector_length(n->in(2)) >= 4);
19513 match(Set dst (MinReductionV src1 src2));
19514 match(Set dst (MaxReductionV src1 src2));
19515 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19516 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19517 ins_encode %{
19518 assert(UseAVX > 0, "sanity");
19519
19520 int opcode = this->ideal_Opcode();
19521 int vlen = Matcher::vector_length(this, $src2);
19522 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19523 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19524 %}
19525 ins_pipe( pipe_slow );
19526 %}
19527
19528 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19529 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19530 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19531 Matcher::vector_length(n->in(2)) == 2);
19532 match(Set dst (MinReductionV dst src));
19533 match(Set dst (MaxReductionV dst src));
19534 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19535 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19536 ins_encode %{
19537 assert(UseAVX > 0, "sanity");
19538
19539 int opcode = this->ideal_Opcode();
19540 int vlen = Matcher::vector_length(this, $src);
19541 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19542 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19543 %}
19544 ins_pipe( pipe_slow );
19545 %}
19546
19547
19548 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19549 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19550 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19551 Matcher::vector_length(n->in(2)) >= 4);
19552 match(Set dst (MinReductionV dst src));
19553 match(Set dst (MaxReductionV dst src));
19554 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19555 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19556 ins_encode %{
19557 assert(UseAVX > 0, "sanity");
19558
19559 int opcode = this->ideal_Opcode();
19560 int vlen = Matcher::vector_length(this, $src);
19561 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19562 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19563 %}
19564 ins_pipe( pipe_slow );
19565 %}
19566
19567 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19568 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19569 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19570 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19571 Matcher::vector_length(n->in(2)) == 2);
19572 match(Set dst (MinReductionV src1 src2));
19573 match(Set dst (MaxReductionV src1 src2));
19574 effect(TEMP dst, TEMP xtmp1);
19575 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19576 ins_encode %{
19577 int opcode = this->ideal_Opcode();
19578 int vlen = Matcher::vector_length(this, $src2);
19579 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19580 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19581 %}
19582 ins_pipe( pipe_slow );
19583 %}
19584
19585 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19586 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19587 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19588 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19589 Matcher::vector_length(n->in(2)) >= 4);
19590 match(Set dst (MinReductionV src1 src2));
19591 match(Set dst (MaxReductionV src1 src2));
19592 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19593 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19594 ins_encode %{
19595 int opcode = this->ideal_Opcode();
19596 int vlen = Matcher::vector_length(this, $src2);
19597 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19598 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19599 %}
19600 ins_pipe( pipe_slow );
19601 %}
19602
19603 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19604 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19605 Matcher::vector_length(n->in(2)) == 2);
19606 match(Set dst (MinReductionV dst src));
19607 match(Set dst (MaxReductionV dst src));
19608 effect(TEMP dst, TEMP xtmp1);
19609 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19610 ins_encode %{
19611 int opcode = this->ideal_Opcode();
19612 int vlen = Matcher::vector_length(this, $src);
19613 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19614 $xtmp1$$XMMRegister);
19615 %}
19616 ins_pipe( pipe_slow );
19617 %}
19618
19619 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19620 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19621 Matcher::vector_length(n->in(2)) >= 4);
19622 match(Set dst (MinReductionV dst src));
19623 match(Set dst (MaxReductionV dst src));
19624 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19625 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19626 ins_encode %{
19627 int opcode = this->ideal_Opcode();
19628 int vlen = Matcher::vector_length(this, $src);
19629 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19630 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19631 %}
19632 ins_pipe( pipe_slow );
19633 %}
19634
19635 //--------------------Min Double Reduction --------------------
19636 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19637 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19638 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19639 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19640 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19641 Matcher::vector_length(n->in(2)) == 2);
19642 match(Set dst (MinReductionV src1 src2));
19643 match(Set dst (MaxReductionV src1 src2));
19644 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19645 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19646 ins_encode %{
19647 assert(UseAVX > 0, "sanity");
19648
19649 int opcode = this->ideal_Opcode();
19650 int vlen = Matcher::vector_length(this, $src2);
19651 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19652 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19653 %}
19654 ins_pipe( pipe_slow );
19655 %}
19656
19657 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19658 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19659 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19660 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19661 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19662 Matcher::vector_length(n->in(2)) >= 4);
19663 match(Set dst (MinReductionV src1 src2));
19664 match(Set dst (MaxReductionV src1 src2));
19665 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19666 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19667 ins_encode %{
19668 assert(UseAVX > 0, "sanity");
19669
19670 int opcode = this->ideal_Opcode();
19671 int vlen = Matcher::vector_length(this, $src2);
19672 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19673 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19674 %}
19675 ins_pipe( pipe_slow );
19676 %}
19677
19678
19679 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19680 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19681 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19682 Matcher::vector_length(n->in(2)) == 2);
19683 match(Set dst (MinReductionV dst src));
19684 match(Set dst (MaxReductionV dst src));
19685 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19686 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19687 ins_encode %{
19688 assert(UseAVX > 0, "sanity");
19689
19690 int opcode = this->ideal_Opcode();
19691 int vlen = Matcher::vector_length(this, $src);
19692 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19693 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19694 %}
19695 ins_pipe( pipe_slow );
19696 %}
19697
19698 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19699 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19700 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19701 Matcher::vector_length(n->in(2)) >= 4);
19702 match(Set dst (MinReductionV dst src));
19703 match(Set dst (MaxReductionV dst src));
19704 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19705 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19706 ins_encode %{
19707 assert(UseAVX > 0, "sanity");
19708
19709 int opcode = this->ideal_Opcode();
19710 int vlen = Matcher::vector_length(this, $src);
19711 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19712 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19713 %}
19714 ins_pipe( pipe_slow );
19715 %}
19716
19717 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19718 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19719 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19720 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19721 Matcher::vector_length(n->in(2)) == 2);
19722 match(Set dst (MinReductionV src1 src2));
19723 match(Set dst (MaxReductionV src1 src2));
19724 effect(TEMP dst, TEMP xtmp1);
19725 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19726 ins_encode %{
19727 int opcode = this->ideal_Opcode();
19728 int vlen = Matcher::vector_length(this, $src2);
19729 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19730 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19731 %}
19732 ins_pipe( pipe_slow );
19733 %}
19734
19735 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19736 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19737 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19738 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19739 Matcher::vector_length(n->in(2)) >= 4);
19740 match(Set dst (MinReductionV src1 src2));
19741 match(Set dst (MaxReductionV src1 src2));
19742 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19743 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19744 ins_encode %{
19745 int opcode = this->ideal_Opcode();
19746 int vlen = Matcher::vector_length(this, $src2);
19747 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19748 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19749 %}
19750 ins_pipe( pipe_slow );
19751 %}
19752
19753
19754 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19755 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19756 Matcher::vector_length(n->in(2)) == 2);
19757 match(Set dst (MinReductionV dst src));
19758 match(Set dst (MaxReductionV dst src));
19759 effect(TEMP dst, TEMP xtmp1);
19760 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19761 ins_encode %{
19762 int opcode = this->ideal_Opcode();
19763 int vlen = Matcher::vector_length(this, $src);
19764 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19765 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19766 %}
19767 ins_pipe( pipe_slow );
19768 %}
19769
19770 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19771 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19772 Matcher::vector_length(n->in(2)) >= 4);
19773 match(Set dst (MinReductionV dst src));
19774 match(Set dst (MaxReductionV dst src));
19775 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19776 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19777 ins_encode %{
19778 int opcode = this->ideal_Opcode();
19779 int vlen = Matcher::vector_length(this, $src);
19780 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19781 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19782 %}
19783 ins_pipe( pipe_slow );
19784 %}
19785
19786 // ====================VECTOR ARITHMETIC=======================================
19787
19788 // --------------------------------- ADD --------------------------------------
19789
19790 // Bytes vector add
19791 instruct vaddB(vec dst, vec src) %{
19792 predicate(UseAVX == 0);
19793 match(Set dst (AddVB dst src));
19794 format %{ "paddb $dst,$src\t! add packedB" %}
19795 ins_encode %{
19796 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19797 %}
19798 ins_pipe( pipe_slow );
19799 %}
19800
19801 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19802 predicate(UseAVX > 0);
19803 match(Set dst (AddVB src1 src2));
19804 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19805 ins_encode %{
19806 int vlen_enc = vector_length_encoding(this);
19807 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19808 %}
19809 ins_pipe( pipe_slow );
19810 %}
19811
19812 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19813 predicate((UseAVX > 0) &&
19814 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19815 match(Set dst (AddVB src (LoadVector mem)));
19816 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19817 ins_encode %{
19818 int vlen_enc = vector_length_encoding(this);
19819 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19820 %}
19821 ins_pipe( pipe_slow );
19822 %}
19823
19824 // Shorts/Chars vector add
19825 instruct vaddS(vec dst, vec src) %{
19826 predicate(UseAVX == 0);
19827 match(Set dst (AddVS dst src));
19828 format %{ "paddw $dst,$src\t! add packedS" %}
19829 ins_encode %{
19830 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19831 %}
19832 ins_pipe( pipe_slow );
19833 %}
19834
19835 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19836 predicate(UseAVX > 0);
19837 match(Set dst (AddVS src1 src2));
19838 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19839 ins_encode %{
19840 int vlen_enc = vector_length_encoding(this);
19841 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19842 %}
19843 ins_pipe( pipe_slow );
19844 %}
19845
19846 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19847 predicate((UseAVX > 0) &&
19848 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19849 match(Set dst (AddVS src (LoadVector mem)));
19850 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19851 ins_encode %{
19852 int vlen_enc = vector_length_encoding(this);
19853 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19854 %}
19855 ins_pipe( pipe_slow );
19856 %}
19857
19858 // Integers vector add
19859 instruct vaddI(vec dst, vec src) %{
19860 predicate(UseAVX == 0);
19861 match(Set dst (AddVI dst src));
19862 format %{ "paddd $dst,$src\t! add packedI" %}
19863 ins_encode %{
19864 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19865 %}
19866 ins_pipe( pipe_slow );
19867 %}
19868
19869 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19870 predicate(UseAVX > 0);
19871 match(Set dst (AddVI src1 src2));
19872 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19873 ins_encode %{
19874 int vlen_enc = vector_length_encoding(this);
19875 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19876 %}
19877 ins_pipe( pipe_slow );
19878 %}
19879
19880
19881 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19882 predicate((UseAVX > 0) &&
19883 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19884 match(Set dst (AddVI src (LoadVector mem)));
19885 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19886 ins_encode %{
19887 int vlen_enc = vector_length_encoding(this);
19888 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19889 %}
19890 ins_pipe( pipe_slow );
19891 %}
19892
19893 // Longs vector add
19894 instruct vaddL(vec dst, vec src) %{
19895 predicate(UseAVX == 0);
19896 match(Set dst (AddVL dst src));
19897 format %{ "paddq $dst,$src\t! add packedL" %}
19898 ins_encode %{
19899 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19900 %}
19901 ins_pipe( pipe_slow );
19902 %}
19903
19904 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19905 predicate(UseAVX > 0);
19906 match(Set dst (AddVL src1 src2));
19907 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19908 ins_encode %{
19909 int vlen_enc = vector_length_encoding(this);
19910 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19911 %}
19912 ins_pipe( pipe_slow );
19913 %}
19914
19915 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19916 predicate((UseAVX > 0) &&
19917 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19918 match(Set dst (AddVL src (LoadVector mem)));
19919 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19920 ins_encode %{
19921 int vlen_enc = vector_length_encoding(this);
19922 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19923 %}
19924 ins_pipe( pipe_slow );
19925 %}
19926
19927 // Floats vector add
19928 instruct vaddF(vec dst, vec src) %{
19929 predicate(UseAVX == 0);
19930 match(Set dst (AddVF dst src));
19931 format %{ "addps $dst,$src\t! add packedF" %}
19932 ins_encode %{
19933 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19934 %}
19935 ins_pipe( pipe_slow );
19936 %}
19937
19938 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19939 predicate(UseAVX > 0);
19940 match(Set dst (AddVF src1 src2));
19941 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
19942 ins_encode %{
19943 int vlen_enc = vector_length_encoding(this);
19944 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19945 %}
19946 ins_pipe( pipe_slow );
19947 %}
19948
19949 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19950 predicate((UseAVX > 0) &&
19951 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19952 match(Set dst (AddVF src (LoadVector mem)));
19953 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
19954 ins_encode %{
19955 int vlen_enc = vector_length_encoding(this);
19956 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19957 %}
19958 ins_pipe( pipe_slow );
19959 %}
19960
19961 // Doubles vector add
19962 instruct vaddD(vec dst, vec src) %{
19963 predicate(UseAVX == 0);
19964 match(Set dst (AddVD dst src));
19965 format %{ "addpd $dst,$src\t! add packedD" %}
19966 ins_encode %{
19967 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19968 %}
19969 ins_pipe( pipe_slow );
19970 %}
19971
19972 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19973 predicate(UseAVX > 0);
19974 match(Set dst (AddVD src1 src2));
19975 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
19976 ins_encode %{
19977 int vlen_enc = vector_length_encoding(this);
19978 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19979 %}
19980 ins_pipe( pipe_slow );
19981 %}
19982
19983 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19984 predicate((UseAVX > 0) &&
19985 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19986 match(Set dst (AddVD src (LoadVector mem)));
19987 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
19988 ins_encode %{
19989 int vlen_enc = vector_length_encoding(this);
19990 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19991 %}
19992 ins_pipe( pipe_slow );
19993 %}
19994
19995 // --------------------------------- SUB --------------------------------------
19996
19997 // Bytes vector sub
19998 instruct vsubB(vec dst, vec src) %{
19999 predicate(UseAVX == 0);
20000 match(Set dst (SubVB dst src));
20001 format %{ "psubb $dst,$src\t! sub packedB" %}
20002 ins_encode %{
20003 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20004 %}
20005 ins_pipe( pipe_slow );
20006 %}
20007
20008 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20009 predicate(UseAVX > 0);
20010 match(Set dst (SubVB src1 src2));
20011 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20012 ins_encode %{
20013 int vlen_enc = vector_length_encoding(this);
20014 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20015 %}
20016 ins_pipe( pipe_slow );
20017 %}
20018
20019 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20020 predicate((UseAVX > 0) &&
20021 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20022 match(Set dst (SubVB src (LoadVector mem)));
20023 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20024 ins_encode %{
20025 int vlen_enc = vector_length_encoding(this);
20026 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20027 %}
20028 ins_pipe( pipe_slow );
20029 %}
20030
20031 // Shorts/Chars vector sub
20032 instruct vsubS(vec dst, vec src) %{
20033 predicate(UseAVX == 0);
20034 match(Set dst (SubVS dst src));
20035 format %{ "psubw $dst,$src\t! sub packedS" %}
20036 ins_encode %{
20037 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20038 %}
20039 ins_pipe( pipe_slow );
20040 %}
20041
20042
20043 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20044 predicate(UseAVX > 0);
20045 match(Set dst (SubVS src1 src2));
20046 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20047 ins_encode %{
20048 int vlen_enc = vector_length_encoding(this);
20049 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20050 %}
20051 ins_pipe( pipe_slow );
20052 %}
20053
20054 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20055 predicate((UseAVX > 0) &&
20056 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20057 match(Set dst (SubVS src (LoadVector mem)));
20058 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20059 ins_encode %{
20060 int vlen_enc = vector_length_encoding(this);
20061 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20062 %}
20063 ins_pipe( pipe_slow );
20064 %}
20065
20066 // Integers vector sub
20067 instruct vsubI(vec dst, vec src) %{
20068 predicate(UseAVX == 0);
20069 match(Set dst (SubVI dst src));
20070 format %{ "psubd $dst,$src\t! sub packedI" %}
20071 ins_encode %{
20072 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20073 %}
20074 ins_pipe( pipe_slow );
20075 %}
20076
20077 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20078 predicate(UseAVX > 0);
20079 match(Set dst (SubVI src1 src2));
20080 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20081 ins_encode %{
20082 int vlen_enc = vector_length_encoding(this);
20083 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20084 %}
20085 ins_pipe( pipe_slow );
20086 %}
20087
20088 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20089 predicate((UseAVX > 0) &&
20090 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20091 match(Set dst (SubVI src (LoadVector mem)));
20092 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20093 ins_encode %{
20094 int vlen_enc = vector_length_encoding(this);
20095 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20096 %}
20097 ins_pipe( pipe_slow );
20098 %}
20099
20100 // Longs vector sub
20101 instruct vsubL(vec dst, vec src) %{
20102 predicate(UseAVX == 0);
20103 match(Set dst (SubVL dst src));
20104 format %{ "psubq $dst,$src\t! sub packedL" %}
20105 ins_encode %{
20106 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20107 %}
20108 ins_pipe( pipe_slow );
20109 %}
20110
20111 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20112 predicate(UseAVX > 0);
20113 match(Set dst (SubVL src1 src2));
20114 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20115 ins_encode %{
20116 int vlen_enc = vector_length_encoding(this);
20117 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20118 %}
20119 ins_pipe( pipe_slow );
20120 %}
20121
20122
20123 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20124 predicate((UseAVX > 0) &&
20125 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20126 match(Set dst (SubVL src (LoadVector mem)));
20127 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20128 ins_encode %{
20129 int vlen_enc = vector_length_encoding(this);
20130 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20131 %}
20132 ins_pipe( pipe_slow );
20133 %}
20134
20135 // Floats vector sub
20136 instruct vsubF(vec dst, vec src) %{
20137 predicate(UseAVX == 0);
20138 match(Set dst (SubVF dst src));
20139 format %{ "subps $dst,$src\t! sub packedF" %}
20140 ins_encode %{
20141 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20142 %}
20143 ins_pipe( pipe_slow );
20144 %}
20145
20146 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20147 predicate(UseAVX > 0);
20148 match(Set dst (SubVF src1 src2));
20149 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20150 ins_encode %{
20151 int vlen_enc = vector_length_encoding(this);
20152 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20153 %}
20154 ins_pipe( pipe_slow );
20155 %}
20156
20157 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20158 predicate((UseAVX > 0) &&
20159 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20160 match(Set dst (SubVF src (LoadVector mem)));
20161 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20162 ins_encode %{
20163 int vlen_enc = vector_length_encoding(this);
20164 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20165 %}
20166 ins_pipe( pipe_slow );
20167 %}
20168
20169 // Doubles vector sub
20170 instruct vsubD(vec dst, vec src) %{
20171 predicate(UseAVX == 0);
20172 match(Set dst (SubVD dst src));
20173 format %{ "subpd $dst,$src\t! sub packedD" %}
20174 ins_encode %{
20175 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20176 %}
20177 ins_pipe( pipe_slow );
20178 %}
20179
20180 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20181 predicate(UseAVX > 0);
20182 match(Set dst (SubVD src1 src2));
20183 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20184 ins_encode %{
20185 int vlen_enc = vector_length_encoding(this);
20186 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20187 %}
20188 ins_pipe( pipe_slow );
20189 %}
20190
20191 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20192 predicate((UseAVX > 0) &&
20193 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20194 match(Set dst (SubVD src (LoadVector mem)));
20195 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20196 ins_encode %{
20197 int vlen_enc = vector_length_encoding(this);
20198 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20199 %}
20200 ins_pipe( pipe_slow );
20201 %}
20202
20203 // --------------------------------- MUL --------------------------------------
20204
20205 // Byte vector mul
20206 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20207 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20208 match(Set dst (MulVB src1 src2));
20209 effect(TEMP dst, TEMP xtmp);
20210 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20211 ins_encode %{
20212 assert(UseSSE > 3, "required");
20213 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20214 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20215 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20216 __ psllw($dst$$XMMRegister, 8);
20217 __ psrlw($dst$$XMMRegister, 8);
20218 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20219 %}
20220 ins_pipe( pipe_slow );
20221 %}
20222
20223 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20224 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20225 match(Set dst (MulVB src1 src2));
20226 effect(TEMP dst, TEMP xtmp);
20227 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20228 ins_encode %{
20229 assert(UseSSE > 3, "required");
20230 // Odd-index elements
20231 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20232 __ psrlw($dst$$XMMRegister, 8);
20233 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20234 __ psrlw($xtmp$$XMMRegister, 8);
20235 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20236 __ psllw($dst$$XMMRegister, 8);
20237 // Even-index elements
20238 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20239 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20240 __ psllw($xtmp$$XMMRegister, 8);
20241 __ psrlw($xtmp$$XMMRegister, 8);
20242 // Combine
20243 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20244 %}
20245 ins_pipe( pipe_slow );
20246 %}
20247
20248 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20249 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20250 match(Set dst (MulVB src1 src2));
20251 effect(TEMP xtmp1, TEMP xtmp2);
20252 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20253 ins_encode %{
20254 int vlen_enc = vector_length_encoding(this);
20255 // Odd-index elements
20256 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20257 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20258 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20259 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20260 // Even-index elements
20261 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20262 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20263 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20264 // Combine
20265 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20266 %}
20267 ins_pipe( pipe_slow );
20268 %}
20269
20270 // Shorts/Chars vector mul
20271 instruct vmulS(vec dst, vec src) %{
20272 predicate(UseAVX == 0);
20273 match(Set dst (MulVS dst src));
20274 format %{ "pmullw $dst,$src\t! mul packedS" %}
20275 ins_encode %{
20276 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20277 %}
20278 ins_pipe( pipe_slow );
20279 %}
20280
20281 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20282 predicate(UseAVX > 0);
20283 match(Set dst (MulVS src1 src2));
20284 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20285 ins_encode %{
20286 int vlen_enc = vector_length_encoding(this);
20287 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20288 %}
20289 ins_pipe( pipe_slow );
20290 %}
20291
20292 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20293 predicate((UseAVX > 0) &&
20294 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20295 match(Set dst (MulVS src (LoadVector mem)));
20296 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20297 ins_encode %{
20298 int vlen_enc = vector_length_encoding(this);
20299 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20300 %}
20301 ins_pipe( pipe_slow );
20302 %}
20303
20304 // Integers vector mul
20305 instruct vmulI(vec dst, vec src) %{
20306 predicate(UseAVX == 0);
20307 match(Set dst (MulVI dst src));
20308 format %{ "pmulld $dst,$src\t! mul packedI" %}
20309 ins_encode %{
20310 assert(UseSSE > 3, "required");
20311 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20312 %}
20313 ins_pipe( pipe_slow );
20314 %}
20315
20316 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20317 predicate(UseAVX > 0);
20318 match(Set dst (MulVI src1 src2));
20319 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20320 ins_encode %{
20321 int vlen_enc = vector_length_encoding(this);
20322 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20323 %}
20324 ins_pipe( pipe_slow );
20325 %}
20326
20327 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20328 predicate((UseAVX > 0) &&
20329 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20330 match(Set dst (MulVI src (LoadVector mem)));
20331 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20332 ins_encode %{
20333 int vlen_enc = vector_length_encoding(this);
20334 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20335 %}
20336 ins_pipe( pipe_slow );
20337 %}
20338
20339 // Longs vector mul
20340 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20341 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20342 VM_Version::supports_avx512dq()) ||
20343 VM_Version::supports_avx512vldq());
20344 match(Set dst (MulVL src1 src2));
20345 ins_cost(500);
20346 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20347 ins_encode %{
20348 assert(UseAVX > 2, "required");
20349 int vlen_enc = vector_length_encoding(this);
20350 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20351 %}
20352 ins_pipe( pipe_slow );
20353 %}
20354
20355 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20356 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20357 VM_Version::supports_avx512dq()) ||
20358 (Matcher::vector_length_in_bytes(n) > 8 &&
20359 VM_Version::supports_avx512vldq()));
20360 match(Set dst (MulVL src (LoadVector mem)));
20361 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20362 ins_cost(500);
20363 ins_encode %{
20364 assert(UseAVX > 2, "required");
20365 int vlen_enc = vector_length_encoding(this);
20366 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20367 %}
20368 ins_pipe( pipe_slow );
20369 %}
20370
20371 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20372 predicate(UseAVX == 0);
20373 match(Set dst (MulVL src1 src2));
20374 ins_cost(500);
20375 effect(TEMP dst, TEMP xtmp);
20376 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20377 ins_encode %{
20378 assert(VM_Version::supports_sse4_1(), "required");
20379 // Get the lo-hi products, only the lower 32 bits is in concerns
20380 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20381 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20382 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20383 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20384 __ psllq($dst$$XMMRegister, 32);
20385 // Get the lo-lo products
20386 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20387 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20388 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20389 %}
20390 ins_pipe( pipe_slow );
20391 %}
20392
20393 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20394 predicate(UseAVX > 0 &&
20395 ((Matcher::vector_length_in_bytes(n) == 64 &&
20396 !VM_Version::supports_avx512dq()) ||
20397 (Matcher::vector_length_in_bytes(n) < 64 &&
20398 !VM_Version::supports_avx512vldq())));
20399 match(Set dst (MulVL src1 src2));
20400 effect(TEMP xtmp1, TEMP xtmp2);
20401 ins_cost(500);
20402 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20403 ins_encode %{
20404 int vlen_enc = vector_length_encoding(this);
20405 // Get the lo-hi products, only the lower 32 bits is in concerns
20406 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20407 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20408 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20409 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20410 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20411 // Get the lo-lo products
20412 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20413 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20414 %}
20415 ins_pipe( pipe_slow );
20416 %}
20417
20418 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20419 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20420 match(Set dst (MulVL src1 src2));
20421 ins_cost(100);
20422 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20423 ins_encode %{
20424 int vlen_enc = vector_length_encoding(this);
20425 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20426 %}
20427 ins_pipe( pipe_slow );
20428 %}
20429
20430 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20431 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20432 match(Set dst (MulVL src1 src2));
20433 ins_cost(100);
20434 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20435 ins_encode %{
20436 int vlen_enc = vector_length_encoding(this);
20437 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20438 %}
20439 ins_pipe( pipe_slow );
20440 %}
20441
20442 // Floats vector mul
20443 instruct vmulF(vec dst, vec src) %{
20444 predicate(UseAVX == 0);
20445 match(Set dst (MulVF dst src));
20446 format %{ "mulps $dst,$src\t! mul packedF" %}
20447 ins_encode %{
20448 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20449 %}
20450 ins_pipe( pipe_slow );
20451 %}
20452
20453 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20454 predicate(UseAVX > 0);
20455 match(Set dst (MulVF src1 src2));
20456 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20457 ins_encode %{
20458 int vlen_enc = vector_length_encoding(this);
20459 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20460 %}
20461 ins_pipe( pipe_slow );
20462 %}
20463
20464 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20465 predicate((UseAVX > 0) &&
20466 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20467 match(Set dst (MulVF src (LoadVector mem)));
20468 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20469 ins_encode %{
20470 int vlen_enc = vector_length_encoding(this);
20471 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20472 %}
20473 ins_pipe( pipe_slow );
20474 %}
20475
20476 // Doubles vector mul
20477 instruct vmulD(vec dst, vec src) %{
20478 predicate(UseAVX == 0);
20479 match(Set dst (MulVD dst src));
20480 format %{ "mulpd $dst,$src\t! mul packedD" %}
20481 ins_encode %{
20482 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20483 %}
20484 ins_pipe( pipe_slow );
20485 %}
20486
20487 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20488 predicate(UseAVX > 0);
20489 match(Set dst (MulVD src1 src2));
20490 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20491 ins_encode %{
20492 int vlen_enc = vector_length_encoding(this);
20493 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20494 %}
20495 ins_pipe( pipe_slow );
20496 %}
20497
20498 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20499 predicate((UseAVX > 0) &&
20500 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20501 match(Set dst (MulVD src (LoadVector mem)));
20502 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20503 ins_encode %{
20504 int vlen_enc = vector_length_encoding(this);
20505 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20506 %}
20507 ins_pipe( pipe_slow );
20508 %}
20509
20510 // --------------------------------- DIV --------------------------------------
20511
20512 // Floats vector div
20513 instruct vdivF(vec dst, vec src) %{
20514 predicate(UseAVX == 0);
20515 match(Set dst (DivVF dst src));
20516 format %{ "divps $dst,$src\t! div packedF" %}
20517 ins_encode %{
20518 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20519 %}
20520 ins_pipe( pipe_slow );
20521 %}
20522
20523 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20524 predicate(UseAVX > 0);
20525 match(Set dst (DivVF src1 src2));
20526 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20527 ins_encode %{
20528 int vlen_enc = vector_length_encoding(this);
20529 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20530 %}
20531 ins_pipe( pipe_slow );
20532 %}
20533
20534 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20535 predicate((UseAVX > 0) &&
20536 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20537 match(Set dst (DivVF src (LoadVector mem)));
20538 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20539 ins_encode %{
20540 int vlen_enc = vector_length_encoding(this);
20541 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20542 %}
20543 ins_pipe( pipe_slow );
20544 %}
20545
20546 // Doubles vector div
20547 instruct vdivD(vec dst, vec src) %{
20548 predicate(UseAVX == 0);
20549 match(Set dst (DivVD dst src));
20550 format %{ "divpd $dst,$src\t! div packedD" %}
20551 ins_encode %{
20552 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20553 %}
20554 ins_pipe( pipe_slow );
20555 %}
20556
20557 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20558 predicate(UseAVX > 0);
20559 match(Set dst (DivVD src1 src2));
20560 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20561 ins_encode %{
20562 int vlen_enc = vector_length_encoding(this);
20563 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20564 %}
20565 ins_pipe( pipe_slow );
20566 %}
20567
20568 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20569 predicate((UseAVX > 0) &&
20570 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20571 match(Set dst (DivVD src (LoadVector mem)));
20572 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20573 ins_encode %{
20574 int vlen_enc = vector_length_encoding(this);
20575 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20576 %}
20577 ins_pipe( pipe_slow );
20578 %}
20579
20580 // ------------------------------ MinMax ---------------------------------------
20581
20582 // Byte, Short, Int vector Min/Max
20583 instruct minmax_reg_sse(vec dst, vec src) %{
20584 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20585 UseAVX == 0);
20586 match(Set dst (MinV dst src));
20587 match(Set dst (MaxV dst src));
20588 format %{ "vector_minmax $dst,$src\t! " %}
20589 ins_encode %{
20590 assert(UseSSE >= 4, "required");
20591
20592 int opcode = this->ideal_Opcode();
20593 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20594 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20595 %}
20596 ins_pipe( pipe_slow );
20597 %}
20598
20599 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20600 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20601 UseAVX > 0);
20602 match(Set dst (MinV src1 src2));
20603 match(Set dst (MaxV src1 src2));
20604 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20605 ins_encode %{
20606 int opcode = this->ideal_Opcode();
20607 int vlen_enc = vector_length_encoding(this);
20608 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20609
20610 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20611 %}
20612 ins_pipe( pipe_slow );
20613 %}
20614
20615 // Long vector Min/Max
20616 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20617 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20618 UseAVX == 0);
20619 match(Set dst (MinV dst src));
20620 match(Set dst (MaxV src dst));
20621 effect(TEMP dst, TEMP tmp);
20622 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20623 ins_encode %{
20624 assert(UseSSE >= 4, "required");
20625
20626 int opcode = this->ideal_Opcode();
20627 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20628 assert(elem_bt == T_LONG, "sanity");
20629
20630 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20631 %}
20632 ins_pipe( pipe_slow );
20633 %}
20634
20635 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20636 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20637 UseAVX > 0 && !VM_Version::supports_avx512vl());
20638 match(Set dst (MinV src1 src2));
20639 match(Set dst (MaxV src1 src2));
20640 effect(TEMP dst);
20641 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20642 ins_encode %{
20643 int vlen_enc = vector_length_encoding(this);
20644 int opcode = this->ideal_Opcode();
20645 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20646 assert(elem_bt == T_LONG, "sanity");
20647
20648 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20649 %}
20650 ins_pipe( pipe_slow );
20651 %}
20652
20653 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20654 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20655 Matcher::vector_element_basic_type(n) == T_LONG);
20656 match(Set dst (MinV src1 src2));
20657 match(Set dst (MaxV src1 src2));
20658 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20659 ins_encode %{
20660 assert(UseAVX > 2, "required");
20661
20662 int vlen_enc = vector_length_encoding(this);
20663 int opcode = this->ideal_Opcode();
20664 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20665 assert(elem_bt == T_LONG, "sanity");
20666
20667 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20668 %}
20669 ins_pipe( pipe_slow );
20670 %}
20671
20672 // Float/Double vector Min/Max
20673 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20674 predicate(VM_Version::supports_avx10_2() &&
20675 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20676 match(Set dst (MinV a b));
20677 match(Set dst (MaxV a b));
20678 format %{ "vector_minmaxFP $dst, $a, $b" %}
20679 ins_encode %{
20680 int vlen_enc = vector_length_encoding(this);
20681 int opcode = this->ideal_Opcode();
20682 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20683 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20684 %}
20685 ins_pipe( pipe_slow );
20686 %}
20687
20688 // Float/Double vector Min/Max
20689 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20690 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20691 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20692 UseAVX > 0);
20693 match(Set dst (MinV a b));
20694 match(Set dst (MaxV a b));
20695 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20696 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20697 ins_encode %{
20698 assert(UseAVX > 0, "required");
20699
20700 int opcode = this->ideal_Opcode();
20701 int vlen_enc = vector_length_encoding(this);
20702 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20703
20704 __ vminmax_fp(opcode, elem_bt,
20705 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20706 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20707 %}
20708 ins_pipe( pipe_slow );
20709 %}
20710
20711 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20712 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20713 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20714 match(Set dst (MinV a b));
20715 match(Set dst (MaxV a b));
20716 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20717 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20718 ins_encode %{
20719 assert(UseAVX > 2, "required");
20720
20721 int opcode = this->ideal_Opcode();
20722 int vlen_enc = vector_length_encoding(this);
20723 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20724
20725 __ evminmax_fp(opcode, elem_bt,
20726 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20727 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20728 %}
20729 ins_pipe( pipe_slow );
20730 %}
20731
20732 // ------------------------------ Unsigned vector Min/Max ----------------------
20733
20734 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20735 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20736 match(Set dst (UMinV a b));
20737 match(Set dst (UMaxV a b));
20738 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20739 ins_encode %{
20740 int opcode = this->ideal_Opcode();
20741 int vlen_enc = vector_length_encoding(this);
20742 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20743 assert(is_integral_type(elem_bt), "");
20744 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20745 %}
20746 ins_pipe( pipe_slow );
20747 %}
20748
20749 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20750 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20751 match(Set dst (UMinV a (LoadVector b)));
20752 match(Set dst (UMaxV a (LoadVector b)));
20753 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20754 ins_encode %{
20755 int opcode = this->ideal_Opcode();
20756 int vlen_enc = vector_length_encoding(this);
20757 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20758 assert(is_integral_type(elem_bt), "");
20759 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20760 %}
20761 ins_pipe( pipe_slow );
20762 %}
20763
20764 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20765 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20766 match(Set dst (UMinV a b));
20767 match(Set dst (UMaxV a b));
20768 effect(TEMP xtmp1, TEMP xtmp2);
20769 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20770 ins_encode %{
20771 int opcode = this->ideal_Opcode();
20772 int vlen_enc = vector_length_encoding(this);
20773 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20774 %}
20775 ins_pipe( pipe_slow );
20776 %}
20777
20778 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20779 match(Set dst (UMinV (Binary dst src2) mask));
20780 match(Set dst (UMaxV (Binary dst src2) mask));
20781 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20782 ins_encode %{
20783 int vlen_enc = vector_length_encoding(this);
20784 BasicType bt = Matcher::vector_element_basic_type(this);
20785 int opc = this->ideal_Opcode();
20786 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20787 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20788 %}
20789 ins_pipe( pipe_slow );
20790 %}
20791
20792 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20793 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20794 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20795 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20796 ins_encode %{
20797 int vlen_enc = vector_length_encoding(this);
20798 BasicType bt = Matcher::vector_element_basic_type(this);
20799 int opc = this->ideal_Opcode();
20800 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20801 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20802 %}
20803 ins_pipe( pipe_slow );
20804 %}
20805
20806 // --------------------------------- Signum/CopySign ---------------------------
20807
20808 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20809 match(Set dst (SignumF dst (Binary zero one)));
20810 effect(KILL cr);
20811 format %{ "signumF $dst, $dst" %}
20812 ins_encode %{
20813 int opcode = this->ideal_Opcode();
20814 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20815 %}
20816 ins_pipe( pipe_slow );
20817 %}
20818
20819 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20820 match(Set dst (SignumD dst (Binary zero one)));
20821 effect(KILL cr);
20822 format %{ "signumD $dst, $dst" %}
20823 ins_encode %{
20824 int opcode = this->ideal_Opcode();
20825 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20826 %}
20827 ins_pipe( pipe_slow );
20828 %}
20829
20830 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20831 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20832 match(Set dst (SignumVF src (Binary zero one)));
20833 match(Set dst (SignumVD src (Binary zero one)));
20834 effect(TEMP dst, TEMP xtmp1);
20835 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20836 ins_encode %{
20837 int opcode = this->ideal_Opcode();
20838 int vec_enc = vector_length_encoding(this);
20839 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20840 $xtmp1$$XMMRegister, vec_enc);
20841 %}
20842 ins_pipe( pipe_slow );
20843 %}
20844
20845 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20846 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20847 match(Set dst (SignumVF src (Binary zero one)));
20848 match(Set dst (SignumVD src (Binary zero one)));
20849 effect(TEMP dst, TEMP ktmp1);
20850 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20851 ins_encode %{
20852 int opcode = this->ideal_Opcode();
20853 int vec_enc = vector_length_encoding(this);
20854 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20855 $ktmp1$$KRegister, vec_enc);
20856 %}
20857 ins_pipe( pipe_slow );
20858 %}
20859
20860 // ---------------------------------------
20861 // For copySign use 0xE4 as writemask for vpternlog
20862 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20863 // C (xmm2) is set to 0x7FFFFFFF
20864 // Wherever xmm2 is 0, we want to pick from B (sign)
20865 // Wherever xmm2 is 1, we want to pick from A (src)
20866 //
20867 // A B C Result
20868 // 0 0 0 0
20869 // 0 0 1 0
20870 // 0 1 0 1
20871 // 0 1 1 0
20872 // 1 0 0 0
20873 // 1 0 1 1
20874 // 1 1 0 1
20875 // 1 1 1 1
20876 //
20877 // Result going from high bit to low bit is 0x11100100 = 0xe4
20878 // ---------------------------------------
20879
20880 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20881 match(Set dst (CopySignF dst src));
20882 effect(TEMP tmp1, TEMP tmp2);
20883 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20884 ins_encode %{
20885 __ movl($tmp2$$Register, 0x7FFFFFFF);
20886 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20887 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20888 %}
20889 ins_pipe( pipe_slow );
20890 %}
20891
20892 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20893 match(Set dst (CopySignD dst (Binary src zero)));
20894 ins_cost(100);
20895 effect(TEMP tmp1, TEMP tmp2);
20896 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20897 ins_encode %{
20898 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20899 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20900 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20901 %}
20902 ins_pipe( pipe_slow );
20903 %}
20904
20905 //----------------------------- CompressBits/ExpandBits ------------------------
20906
20907 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20908 predicate(n->bottom_type()->isa_int());
20909 match(Set dst (CompressBits src mask));
20910 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20911 ins_encode %{
20912 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20913 %}
20914 ins_pipe( pipe_slow );
20915 %}
20916
20917 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20918 predicate(n->bottom_type()->isa_int());
20919 match(Set dst (ExpandBits src mask));
20920 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20921 ins_encode %{
20922 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20923 %}
20924 ins_pipe( pipe_slow );
20925 %}
20926
20927 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20928 predicate(n->bottom_type()->isa_int());
20929 match(Set dst (CompressBits src (LoadI mask)));
20930 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20931 ins_encode %{
20932 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20933 %}
20934 ins_pipe( pipe_slow );
20935 %}
20936
20937 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20938 predicate(n->bottom_type()->isa_int());
20939 match(Set dst (ExpandBits src (LoadI mask)));
20940 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20941 ins_encode %{
20942 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20943 %}
20944 ins_pipe( pipe_slow );
20945 %}
20946
20947 // --------------------------------- Sqrt --------------------------------------
20948
20949 instruct vsqrtF_reg(vec dst, vec src) %{
20950 match(Set dst (SqrtVF src));
20951 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
20952 ins_encode %{
20953 assert(UseAVX > 0, "required");
20954 int vlen_enc = vector_length_encoding(this);
20955 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20956 %}
20957 ins_pipe( pipe_slow );
20958 %}
20959
20960 instruct vsqrtF_mem(vec dst, memory mem) %{
20961 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20962 match(Set dst (SqrtVF (LoadVector mem)));
20963 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
20964 ins_encode %{
20965 assert(UseAVX > 0, "required");
20966 int vlen_enc = vector_length_encoding(this);
20967 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20968 %}
20969 ins_pipe( pipe_slow );
20970 %}
20971
20972 // Floating point vector sqrt
20973 instruct vsqrtD_reg(vec dst, vec src) %{
20974 match(Set dst (SqrtVD src));
20975 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
20976 ins_encode %{
20977 assert(UseAVX > 0, "required");
20978 int vlen_enc = vector_length_encoding(this);
20979 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20980 %}
20981 ins_pipe( pipe_slow );
20982 %}
20983
20984 instruct vsqrtD_mem(vec dst, memory mem) %{
20985 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20986 match(Set dst (SqrtVD (LoadVector mem)));
20987 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
20988 ins_encode %{
20989 assert(UseAVX > 0, "required");
20990 int vlen_enc = vector_length_encoding(this);
20991 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20992 %}
20993 ins_pipe( pipe_slow );
20994 %}
20995
20996 // ------------------------------ Shift ---------------------------------------
20997
20998 // Left and right shift count vectors are the same on x86
20999 // (only lowest bits of xmm reg are used for count).
21000 instruct vshiftcnt(vec dst, rRegI cnt) %{
21001 match(Set dst (LShiftCntV cnt));
21002 match(Set dst (RShiftCntV cnt));
21003 format %{ "movdl $dst,$cnt\t! load shift count" %}
21004 ins_encode %{
21005 __ movdl($dst$$XMMRegister, $cnt$$Register);
21006 %}
21007 ins_pipe( pipe_slow );
21008 %}
21009
21010 // Byte vector shift
21011 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21012 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21013 match(Set dst ( LShiftVB src shift));
21014 match(Set dst ( RShiftVB src shift));
21015 match(Set dst (URShiftVB src shift));
21016 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21017 format %{"vector_byte_shift $dst,$src,$shift" %}
21018 ins_encode %{
21019 assert(UseSSE > 3, "required");
21020 int opcode = this->ideal_Opcode();
21021 bool sign = (opcode != Op_URShiftVB);
21022 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21023 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21024 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21025 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21026 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21027 %}
21028 ins_pipe( pipe_slow );
21029 %}
21030
21031 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21032 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21033 UseAVX <= 1);
21034 match(Set dst ( LShiftVB src shift));
21035 match(Set dst ( RShiftVB src shift));
21036 match(Set dst (URShiftVB src shift));
21037 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21038 format %{"vector_byte_shift $dst,$src,$shift" %}
21039 ins_encode %{
21040 assert(UseSSE > 3, "required");
21041 int opcode = this->ideal_Opcode();
21042 bool sign = (opcode != Op_URShiftVB);
21043 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21044 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21045 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21046 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21047 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21048 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21049 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21050 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21051 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21052 %}
21053 ins_pipe( pipe_slow );
21054 %}
21055
21056 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21057 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21058 UseAVX > 1);
21059 match(Set dst ( LShiftVB src shift));
21060 match(Set dst ( RShiftVB src shift));
21061 match(Set dst (URShiftVB src shift));
21062 effect(TEMP dst, TEMP tmp);
21063 format %{"vector_byte_shift $dst,$src,$shift" %}
21064 ins_encode %{
21065 int opcode = this->ideal_Opcode();
21066 bool sign = (opcode != Op_URShiftVB);
21067 int vlen_enc = Assembler::AVX_256bit;
21068 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21069 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21070 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21071 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21072 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21073 %}
21074 ins_pipe( pipe_slow );
21075 %}
21076
21077 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21078 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21079 match(Set dst ( LShiftVB src shift));
21080 match(Set dst ( RShiftVB src shift));
21081 match(Set dst (URShiftVB src shift));
21082 effect(TEMP dst, TEMP tmp);
21083 format %{"vector_byte_shift $dst,$src,$shift" %}
21084 ins_encode %{
21085 assert(UseAVX > 1, "required");
21086 int opcode = this->ideal_Opcode();
21087 bool sign = (opcode != Op_URShiftVB);
21088 int vlen_enc = Assembler::AVX_256bit;
21089 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21090 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21091 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21092 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21093 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21094 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21095 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21096 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21097 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21098 %}
21099 ins_pipe( pipe_slow );
21100 %}
21101
21102 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21103 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21104 match(Set dst ( LShiftVB src shift));
21105 match(Set dst (RShiftVB src shift));
21106 match(Set dst (URShiftVB src shift));
21107 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21108 format %{"vector_byte_shift $dst,$src,$shift" %}
21109 ins_encode %{
21110 assert(UseAVX > 2, "required");
21111 int opcode = this->ideal_Opcode();
21112 bool sign = (opcode != Op_URShiftVB);
21113 int vlen_enc = Assembler::AVX_512bit;
21114 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21115 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21116 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21117 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21118 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21119 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21120 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21121 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21122 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21123 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21124 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21125 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21126 %}
21127 ins_pipe( pipe_slow );
21128 %}
21129
21130 // Shorts vector logical right shift produces incorrect Java result
21131 // for negative data because java code convert short value into int with
21132 // sign extension before a shift. But char vectors are fine since chars are
21133 // unsigned values.
21134 // Shorts/Chars vector left shift
21135 instruct vshiftS(vec dst, vec src, vec shift) %{
21136 predicate(!n->as_ShiftV()->is_var_shift());
21137 match(Set dst ( LShiftVS src shift));
21138 match(Set dst ( RShiftVS src shift));
21139 match(Set dst (URShiftVS src shift));
21140 effect(TEMP dst, USE src, USE shift);
21141 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21142 ins_encode %{
21143 int opcode = this->ideal_Opcode();
21144 if (UseAVX > 0) {
21145 int vlen_enc = vector_length_encoding(this);
21146 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21147 } else {
21148 int vlen = Matcher::vector_length(this);
21149 if (vlen == 2) {
21150 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21151 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21152 } else if (vlen == 4) {
21153 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21154 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21155 } else {
21156 assert (vlen == 8, "sanity");
21157 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21158 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21159 }
21160 }
21161 %}
21162 ins_pipe( pipe_slow );
21163 %}
21164
21165 // Integers vector left shift
21166 instruct vshiftI(vec dst, vec src, vec shift) %{
21167 predicate(!n->as_ShiftV()->is_var_shift());
21168 match(Set dst ( LShiftVI src shift));
21169 match(Set dst ( RShiftVI src shift));
21170 match(Set dst (URShiftVI src shift));
21171 effect(TEMP dst, USE src, USE shift);
21172 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21173 ins_encode %{
21174 int opcode = this->ideal_Opcode();
21175 if (UseAVX > 0) {
21176 int vlen_enc = vector_length_encoding(this);
21177 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21178 } else {
21179 int vlen = Matcher::vector_length(this);
21180 if (vlen == 2) {
21181 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21182 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21183 } else {
21184 assert(vlen == 4, "sanity");
21185 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21186 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21187 }
21188 }
21189 %}
21190 ins_pipe( pipe_slow );
21191 %}
21192
21193 // Integers vector left constant shift
21194 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21195 match(Set dst (LShiftVI src (LShiftCntV shift)));
21196 match(Set dst (RShiftVI src (RShiftCntV shift)));
21197 match(Set dst (URShiftVI src (RShiftCntV shift)));
21198 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21199 ins_encode %{
21200 int opcode = this->ideal_Opcode();
21201 if (UseAVX > 0) {
21202 int vector_len = vector_length_encoding(this);
21203 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21204 } else {
21205 int vlen = Matcher::vector_length(this);
21206 if (vlen == 2) {
21207 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21208 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21209 } else {
21210 assert(vlen == 4, "sanity");
21211 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21212 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21213 }
21214 }
21215 %}
21216 ins_pipe( pipe_slow );
21217 %}
21218
21219 // Longs vector shift
21220 instruct vshiftL(vec dst, vec src, vec shift) %{
21221 predicate(!n->as_ShiftV()->is_var_shift());
21222 match(Set dst ( LShiftVL src shift));
21223 match(Set dst (URShiftVL src shift));
21224 effect(TEMP dst, USE src, USE shift);
21225 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21226 ins_encode %{
21227 int opcode = this->ideal_Opcode();
21228 if (UseAVX > 0) {
21229 int vlen_enc = vector_length_encoding(this);
21230 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21231 } else {
21232 assert(Matcher::vector_length(this) == 2, "");
21233 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21234 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21235 }
21236 %}
21237 ins_pipe( pipe_slow );
21238 %}
21239
21240 // Longs vector constant shift
21241 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21242 match(Set dst (LShiftVL src (LShiftCntV shift)));
21243 match(Set dst (URShiftVL src (RShiftCntV shift)));
21244 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21245 ins_encode %{
21246 int opcode = this->ideal_Opcode();
21247 if (UseAVX > 0) {
21248 int vector_len = vector_length_encoding(this);
21249 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21250 } else {
21251 assert(Matcher::vector_length(this) == 2, "");
21252 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21253 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21254 }
21255 %}
21256 ins_pipe( pipe_slow );
21257 %}
21258
21259 // -------------------ArithmeticRightShift -----------------------------------
21260 // Long vector arithmetic right shift
21261 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21262 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21263 match(Set dst (RShiftVL src shift));
21264 effect(TEMP dst, TEMP tmp);
21265 format %{ "vshiftq $dst,$src,$shift" %}
21266 ins_encode %{
21267 uint vlen = Matcher::vector_length(this);
21268 if (vlen == 2) {
21269 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21270 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21271 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21272 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21273 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21274 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21275 } else {
21276 assert(vlen == 4, "sanity");
21277 assert(UseAVX > 1, "required");
21278 int vlen_enc = Assembler::AVX_256bit;
21279 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21280 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21281 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21282 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21283 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21284 }
21285 %}
21286 ins_pipe( pipe_slow );
21287 %}
21288
21289 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21290 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21291 match(Set dst (RShiftVL src shift));
21292 format %{ "vshiftq $dst,$src,$shift" %}
21293 ins_encode %{
21294 int vlen_enc = vector_length_encoding(this);
21295 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21296 %}
21297 ins_pipe( pipe_slow );
21298 %}
21299
21300 // ------------------- Variable Shift -----------------------------
21301 // Byte variable shift
21302 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21303 predicate(Matcher::vector_length(n) <= 8 &&
21304 n->as_ShiftV()->is_var_shift() &&
21305 !VM_Version::supports_avx512bw());
21306 match(Set dst ( LShiftVB src shift));
21307 match(Set dst ( RShiftVB src shift));
21308 match(Set dst (URShiftVB src shift));
21309 effect(TEMP dst, TEMP vtmp);
21310 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21311 ins_encode %{
21312 assert(UseAVX >= 2, "required");
21313
21314 int opcode = this->ideal_Opcode();
21315 int vlen_enc = Assembler::AVX_128bit;
21316 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21317 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21318 %}
21319 ins_pipe( pipe_slow );
21320 %}
21321
21322 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21323 predicate(Matcher::vector_length(n) == 16 &&
21324 n->as_ShiftV()->is_var_shift() &&
21325 !VM_Version::supports_avx512bw());
21326 match(Set dst ( LShiftVB src shift));
21327 match(Set dst ( RShiftVB src shift));
21328 match(Set dst (URShiftVB src shift));
21329 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21330 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21331 ins_encode %{
21332 assert(UseAVX >= 2, "required");
21333
21334 int opcode = this->ideal_Opcode();
21335 int vlen_enc = Assembler::AVX_128bit;
21336 // Shift lower half and get word result in dst
21337 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21338
21339 // Shift upper half and get word result in vtmp1
21340 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21341 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21342 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21343
21344 // Merge and down convert the two word results to byte in dst
21345 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21346 %}
21347 ins_pipe( pipe_slow );
21348 %}
21349
21350 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21351 predicate(Matcher::vector_length(n) == 32 &&
21352 n->as_ShiftV()->is_var_shift() &&
21353 !VM_Version::supports_avx512bw());
21354 match(Set dst ( LShiftVB src shift));
21355 match(Set dst ( RShiftVB src shift));
21356 match(Set dst (URShiftVB src shift));
21357 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21358 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21359 ins_encode %{
21360 assert(UseAVX >= 2, "required");
21361
21362 int opcode = this->ideal_Opcode();
21363 int vlen_enc = Assembler::AVX_128bit;
21364 // Process lower 128 bits and get result in dst
21365 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21366 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21367 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21368 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21369 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21370
21371 // Process higher 128 bits and get result in vtmp3
21372 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21373 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21374 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21375 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21376 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21377 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21378 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21379
21380 // Merge the two results in dst
21381 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21382 %}
21383 ins_pipe( pipe_slow );
21384 %}
21385
21386 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21387 predicate(Matcher::vector_length(n) <= 32 &&
21388 n->as_ShiftV()->is_var_shift() &&
21389 VM_Version::supports_avx512bw());
21390 match(Set dst ( LShiftVB src shift));
21391 match(Set dst ( RShiftVB src shift));
21392 match(Set dst (URShiftVB src shift));
21393 effect(TEMP dst, TEMP vtmp);
21394 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21395 ins_encode %{
21396 assert(UseAVX > 2, "required");
21397
21398 int opcode = this->ideal_Opcode();
21399 int vlen_enc = vector_length_encoding(this);
21400 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21401 %}
21402 ins_pipe( pipe_slow );
21403 %}
21404
21405 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21406 predicate(Matcher::vector_length(n) == 64 &&
21407 n->as_ShiftV()->is_var_shift() &&
21408 VM_Version::supports_avx512bw());
21409 match(Set dst ( LShiftVB src shift));
21410 match(Set dst ( RShiftVB src shift));
21411 match(Set dst (URShiftVB src shift));
21412 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21413 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21414 ins_encode %{
21415 assert(UseAVX > 2, "required");
21416
21417 int opcode = this->ideal_Opcode();
21418 int vlen_enc = Assembler::AVX_256bit;
21419 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21420 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21421 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21422 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21423 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21424 %}
21425 ins_pipe( pipe_slow );
21426 %}
21427
21428 // Short variable shift
21429 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21430 predicate(Matcher::vector_length(n) <= 8 &&
21431 n->as_ShiftV()->is_var_shift() &&
21432 !VM_Version::supports_avx512bw());
21433 match(Set dst ( LShiftVS src shift));
21434 match(Set dst ( RShiftVS src shift));
21435 match(Set dst (URShiftVS src shift));
21436 effect(TEMP dst, TEMP vtmp);
21437 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21438 ins_encode %{
21439 assert(UseAVX >= 2, "required");
21440
21441 int opcode = this->ideal_Opcode();
21442 bool sign = (opcode != Op_URShiftVS);
21443 int vlen_enc = Assembler::AVX_256bit;
21444 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21445 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21446 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21447 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21448 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21449 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21450 %}
21451 ins_pipe( pipe_slow );
21452 %}
21453
21454 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21455 predicate(Matcher::vector_length(n) == 16 &&
21456 n->as_ShiftV()->is_var_shift() &&
21457 !VM_Version::supports_avx512bw());
21458 match(Set dst ( LShiftVS src shift));
21459 match(Set dst ( RShiftVS src shift));
21460 match(Set dst (URShiftVS src shift));
21461 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21462 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21463 ins_encode %{
21464 assert(UseAVX >= 2, "required");
21465
21466 int opcode = this->ideal_Opcode();
21467 bool sign = (opcode != Op_URShiftVS);
21468 int vlen_enc = Assembler::AVX_256bit;
21469 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21470 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21471 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21472 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21473 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21474
21475 // Shift upper half, with result in dst using vtmp1 as TEMP
21476 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21477 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21478 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21479 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21480 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21481 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21482
21483 // Merge lower and upper half result into dst
21484 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21485 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21486 %}
21487 ins_pipe( pipe_slow );
21488 %}
21489
21490 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21491 predicate(n->as_ShiftV()->is_var_shift() &&
21492 VM_Version::supports_avx512bw());
21493 match(Set dst ( LShiftVS src shift));
21494 match(Set dst ( RShiftVS src shift));
21495 match(Set dst (URShiftVS src shift));
21496 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21497 ins_encode %{
21498 assert(UseAVX > 2, "required");
21499
21500 int opcode = this->ideal_Opcode();
21501 int vlen_enc = vector_length_encoding(this);
21502 if (!VM_Version::supports_avx512vl()) {
21503 vlen_enc = Assembler::AVX_512bit;
21504 }
21505 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21506 %}
21507 ins_pipe( pipe_slow );
21508 %}
21509
21510 //Integer variable shift
21511 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21512 predicate(n->as_ShiftV()->is_var_shift());
21513 match(Set dst ( LShiftVI src shift));
21514 match(Set dst ( RShiftVI src shift));
21515 match(Set dst (URShiftVI src shift));
21516 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21517 ins_encode %{
21518 assert(UseAVX >= 2, "required");
21519
21520 int opcode = this->ideal_Opcode();
21521 int vlen_enc = vector_length_encoding(this);
21522 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21523 %}
21524 ins_pipe( pipe_slow );
21525 %}
21526
21527 //Long variable shift
21528 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21529 predicate(n->as_ShiftV()->is_var_shift());
21530 match(Set dst ( LShiftVL src shift));
21531 match(Set dst (URShiftVL src shift));
21532 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21533 ins_encode %{
21534 assert(UseAVX >= 2, "required");
21535
21536 int opcode = this->ideal_Opcode();
21537 int vlen_enc = vector_length_encoding(this);
21538 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21539 %}
21540 ins_pipe( pipe_slow );
21541 %}
21542
21543 //Long variable right shift arithmetic
21544 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21545 predicate(Matcher::vector_length(n) <= 4 &&
21546 n->as_ShiftV()->is_var_shift() &&
21547 UseAVX == 2);
21548 match(Set dst (RShiftVL src shift));
21549 effect(TEMP dst, TEMP vtmp);
21550 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21551 ins_encode %{
21552 int opcode = this->ideal_Opcode();
21553 int vlen_enc = vector_length_encoding(this);
21554 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21555 $vtmp$$XMMRegister);
21556 %}
21557 ins_pipe( pipe_slow );
21558 %}
21559
21560 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21561 predicate(n->as_ShiftV()->is_var_shift() &&
21562 UseAVX > 2);
21563 match(Set dst (RShiftVL src shift));
21564 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21565 ins_encode %{
21566 int opcode = this->ideal_Opcode();
21567 int vlen_enc = vector_length_encoding(this);
21568 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21569 %}
21570 ins_pipe( pipe_slow );
21571 %}
21572
21573 // --------------------------------- AND --------------------------------------
21574
21575 instruct vand(vec dst, vec src) %{
21576 predicate(UseAVX == 0);
21577 match(Set dst (AndV dst src));
21578 format %{ "pand $dst,$src\t! and vectors" %}
21579 ins_encode %{
21580 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21581 %}
21582 ins_pipe( pipe_slow );
21583 %}
21584
21585 instruct vand_reg(vec dst, vec src1, vec src2) %{
21586 predicate(UseAVX > 0);
21587 match(Set dst (AndV src1 src2));
21588 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21589 ins_encode %{
21590 int vlen_enc = vector_length_encoding(this);
21591 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21592 %}
21593 ins_pipe( pipe_slow );
21594 %}
21595
21596 instruct vand_mem(vec dst, vec src, memory mem) %{
21597 predicate((UseAVX > 0) &&
21598 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21599 match(Set dst (AndV src (LoadVector mem)));
21600 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21601 ins_encode %{
21602 int vlen_enc = vector_length_encoding(this);
21603 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21604 %}
21605 ins_pipe( pipe_slow );
21606 %}
21607
21608 // --------------------------------- OR ---------------------------------------
21609
21610 instruct vor(vec dst, vec src) %{
21611 predicate(UseAVX == 0);
21612 match(Set dst (OrV dst src));
21613 format %{ "por $dst,$src\t! or vectors" %}
21614 ins_encode %{
21615 __ por($dst$$XMMRegister, $src$$XMMRegister);
21616 %}
21617 ins_pipe( pipe_slow );
21618 %}
21619
21620 instruct vor_reg(vec dst, vec src1, vec src2) %{
21621 predicate(UseAVX > 0);
21622 match(Set dst (OrV src1 src2));
21623 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21624 ins_encode %{
21625 int vlen_enc = vector_length_encoding(this);
21626 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21627 %}
21628 ins_pipe( pipe_slow );
21629 %}
21630
21631 instruct vor_mem(vec dst, vec src, memory mem) %{
21632 predicate((UseAVX > 0) &&
21633 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21634 match(Set dst (OrV src (LoadVector mem)));
21635 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21636 ins_encode %{
21637 int vlen_enc = vector_length_encoding(this);
21638 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21639 %}
21640 ins_pipe( pipe_slow );
21641 %}
21642
21643 // --------------------------------- XOR --------------------------------------
21644
21645 instruct vxor(vec dst, vec src) %{
21646 predicate(UseAVX == 0);
21647 match(Set dst (XorV dst src));
21648 format %{ "pxor $dst,$src\t! xor vectors" %}
21649 ins_encode %{
21650 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21651 %}
21652 ins_pipe( pipe_slow );
21653 %}
21654
21655 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21656 predicate(UseAVX > 0);
21657 match(Set dst (XorV src1 src2));
21658 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21659 ins_encode %{
21660 int vlen_enc = vector_length_encoding(this);
21661 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21662 %}
21663 ins_pipe( pipe_slow );
21664 %}
21665
21666 instruct vxor_mem(vec dst, vec src, memory mem) %{
21667 predicate((UseAVX > 0) &&
21668 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21669 match(Set dst (XorV src (LoadVector mem)));
21670 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21671 ins_encode %{
21672 int vlen_enc = vector_length_encoding(this);
21673 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21674 %}
21675 ins_pipe( pipe_slow );
21676 %}
21677
21678 // --------------------------------- VectorCast --------------------------------------
21679
21680 instruct vcastBtoX(vec dst, vec src) %{
21681 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21682 match(Set dst (VectorCastB2X src));
21683 format %{ "vector_cast_b2x $dst,$src\t!" %}
21684 ins_encode %{
21685 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21686 int vlen_enc = vector_length_encoding(this);
21687 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21688 %}
21689 ins_pipe( pipe_slow );
21690 %}
21691
21692 instruct vcastBtoD(legVec dst, legVec src) %{
21693 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21694 match(Set dst (VectorCastB2X src));
21695 format %{ "vector_cast_b2x $dst,$src\t!" %}
21696 ins_encode %{
21697 int vlen_enc = vector_length_encoding(this);
21698 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21699 %}
21700 ins_pipe( pipe_slow );
21701 %}
21702
21703 instruct castStoX(vec dst, vec src) %{
21704 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21705 Matcher::vector_length(n->in(1)) <= 8 && // src
21706 Matcher::vector_element_basic_type(n) == T_BYTE);
21707 match(Set dst (VectorCastS2X src));
21708 format %{ "vector_cast_s2x $dst,$src" %}
21709 ins_encode %{
21710 assert(UseAVX > 0, "required");
21711
21712 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21713 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21714 %}
21715 ins_pipe( pipe_slow );
21716 %}
21717
21718 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21719 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21720 Matcher::vector_length(n->in(1)) == 16 && // src
21721 Matcher::vector_element_basic_type(n) == T_BYTE);
21722 effect(TEMP dst, TEMP vtmp);
21723 match(Set dst (VectorCastS2X src));
21724 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21725 ins_encode %{
21726 assert(UseAVX > 0, "required");
21727
21728 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21729 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21730 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21731 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21732 %}
21733 ins_pipe( pipe_slow );
21734 %}
21735
21736 instruct vcastStoX_evex(vec dst, vec src) %{
21737 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21738 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21739 match(Set dst (VectorCastS2X src));
21740 format %{ "vector_cast_s2x $dst,$src\t!" %}
21741 ins_encode %{
21742 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21743 int src_vlen_enc = vector_length_encoding(this, $src);
21744 int vlen_enc = vector_length_encoding(this);
21745 switch (to_elem_bt) {
21746 case T_BYTE:
21747 if (!VM_Version::supports_avx512vl()) {
21748 vlen_enc = Assembler::AVX_512bit;
21749 }
21750 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21751 break;
21752 case T_INT:
21753 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21754 break;
21755 case T_FLOAT:
21756 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21757 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21758 break;
21759 case T_LONG:
21760 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21761 break;
21762 case T_DOUBLE: {
21763 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21764 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21765 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21766 break;
21767 }
21768 default:
21769 ShouldNotReachHere();
21770 }
21771 %}
21772 ins_pipe( pipe_slow );
21773 %}
21774
21775 instruct castItoX(vec dst, vec src) %{
21776 predicate(UseAVX <= 2 &&
21777 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21778 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21779 match(Set dst (VectorCastI2X src));
21780 format %{ "vector_cast_i2x $dst,$src" %}
21781 ins_encode %{
21782 assert(UseAVX > 0, "required");
21783
21784 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21785 int vlen_enc = vector_length_encoding(this, $src);
21786
21787 if (to_elem_bt == T_BYTE) {
21788 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21789 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21790 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21791 } else {
21792 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21793 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21794 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21795 }
21796 %}
21797 ins_pipe( pipe_slow );
21798 %}
21799
21800 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21801 predicate(UseAVX <= 2 &&
21802 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21803 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21804 match(Set dst (VectorCastI2X src));
21805 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21806 effect(TEMP dst, TEMP vtmp);
21807 ins_encode %{
21808 assert(UseAVX > 0, "required");
21809
21810 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21811 int vlen_enc = vector_length_encoding(this, $src);
21812
21813 if (to_elem_bt == T_BYTE) {
21814 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21815 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21816 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21817 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21818 } else {
21819 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21820 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21821 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21822 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21823 }
21824 %}
21825 ins_pipe( pipe_slow );
21826 %}
21827
21828 instruct vcastItoX_evex(vec dst, vec src) %{
21829 predicate(UseAVX > 2 ||
21830 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21831 match(Set dst (VectorCastI2X src));
21832 format %{ "vector_cast_i2x $dst,$src\t!" %}
21833 ins_encode %{
21834 assert(UseAVX > 0, "required");
21835
21836 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21837 int src_vlen_enc = vector_length_encoding(this, $src);
21838 int dst_vlen_enc = vector_length_encoding(this);
21839 switch (dst_elem_bt) {
21840 case T_BYTE:
21841 if (!VM_Version::supports_avx512vl()) {
21842 src_vlen_enc = Assembler::AVX_512bit;
21843 }
21844 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21845 break;
21846 case T_SHORT:
21847 if (!VM_Version::supports_avx512vl()) {
21848 src_vlen_enc = Assembler::AVX_512bit;
21849 }
21850 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21851 break;
21852 case T_FLOAT:
21853 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21854 break;
21855 case T_LONG:
21856 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21857 break;
21858 case T_DOUBLE:
21859 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21860 break;
21861 default:
21862 ShouldNotReachHere();
21863 }
21864 %}
21865 ins_pipe( pipe_slow );
21866 %}
21867
21868 instruct vcastLtoBS(vec dst, vec src) %{
21869 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21870 UseAVX <= 2);
21871 match(Set dst (VectorCastL2X src));
21872 format %{ "vector_cast_l2x $dst,$src" %}
21873 ins_encode %{
21874 assert(UseAVX > 0, "required");
21875
21876 int vlen = Matcher::vector_length_in_bytes(this, $src);
21877 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21878 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21879 : ExternalAddress(vector_int_to_short_mask());
21880 if (vlen <= 16) {
21881 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21882 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21883 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21884 } else {
21885 assert(vlen <= 32, "required");
21886 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21887 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21888 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21889 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21890 }
21891 if (to_elem_bt == T_BYTE) {
21892 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21893 }
21894 %}
21895 ins_pipe( pipe_slow );
21896 %}
21897
21898 instruct vcastLtoX_evex(vec dst, vec src) %{
21899 predicate(UseAVX > 2 ||
21900 (Matcher::vector_element_basic_type(n) == T_INT ||
21901 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21902 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21903 match(Set dst (VectorCastL2X src));
21904 format %{ "vector_cast_l2x $dst,$src\t!" %}
21905 ins_encode %{
21906 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21907 int vlen = Matcher::vector_length_in_bytes(this, $src);
21908 int vlen_enc = vector_length_encoding(this, $src);
21909 switch (to_elem_bt) {
21910 case T_BYTE:
21911 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21912 vlen_enc = Assembler::AVX_512bit;
21913 }
21914 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21915 break;
21916 case T_SHORT:
21917 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21918 vlen_enc = Assembler::AVX_512bit;
21919 }
21920 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21921 break;
21922 case T_INT:
21923 if (vlen == 8) {
21924 if ($dst$$XMMRegister != $src$$XMMRegister) {
21925 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21926 }
21927 } else if (vlen == 16) {
21928 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21929 } else if (vlen == 32) {
21930 if (UseAVX > 2) {
21931 if (!VM_Version::supports_avx512vl()) {
21932 vlen_enc = Assembler::AVX_512bit;
21933 }
21934 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21935 } else {
21936 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21937 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21938 }
21939 } else { // vlen == 64
21940 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21941 }
21942 break;
21943 case T_FLOAT:
21944 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21945 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21946 break;
21947 case T_DOUBLE:
21948 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21949 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21950 break;
21951
21952 default: assert(false, "%s", type2name(to_elem_bt));
21953 }
21954 %}
21955 ins_pipe( pipe_slow );
21956 %}
21957
21958 instruct vcastFtoD_reg(vec dst, vec src) %{
21959 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21960 match(Set dst (VectorCastF2X src));
21961 format %{ "vector_cast_f2d $dst,$src\t!" %}
21962 ins_encode %{
21963 int vlen_enc = vector_length_encoding(this);
21964 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21965 %}
21966 ins_pipe( pipe_slow );
21967 %}
21968
21969
21970 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21971 predicate(!VM_Version::supports_avx10_2() &&
21972 !VM_Version::supports_avx512vl() &&
21973 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21974 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21975 is_integral_type(Matcher::vector_element_basic_type(n)));
21976 match(Set dst (VectorCastF2X src));
21977 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21978 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21979 ins_encode %{
21980 int vlen_enc = vector_length_encoding(this, $src);
21981 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21982 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21983 // 32 bit addresses for register indirect addressing mode since stub constants
21984 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21985 // However, targets are free to increase this limit, but having a large code cache size
21986 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21987 // cap we save a temporary register allocation which in limiting case can prevent
21988 // spilling in high register pressure blocks.
21989 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21990 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21991 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21992 %}
21993 ins_pipe( pipe_slow );
21994 %}
21995
21996 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21997 predicate(!VM_Version::supports_avx10_2() &&
21998 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21999 is_integral_type(Matcher::vector_element_basic_type(n)));
22000 match(Set dst (VectorCastF2X src));
22001 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22002 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22003 ins_encode %{
22004 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22005 if (to_elem_bt == T_LONG) {
22006 int vlen_enc = vector_length_encoding(this);
22007 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22008 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22009 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22010 } else {
22011 int vlen_enc = vector_length_encoding(this, $src);
22012 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22013 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22014 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22015 }
22016 %}
22017 ins_pipe( pipe_slow );
22018 %}
22019
22020 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22021 predicate(VM_Version::supports_avx10_2() &&
22022 is_integral_type(Matcher::vector_element_basic_type(n)));
22023 match(Set dst (VectorCastF2X src));
22024 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22025 ins_encode %{
22026 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22027 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22028 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22029 %}
22030 ins_pipe( pipe_slow );
22031 %}
22032
22033 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22034 predicate(VM_Version::supports_avx10_2() &&
22035 is_integral_type(Matcher::vector_element_basic_type(n)));
22036 match(Set dst (VectorCastF2X (LoadVector src)));
22037 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22038 ins_encode %{
22039 int vlen = Matcher::vector_length(this);
22040 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22041 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22042 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22043 %}
22044 ins_pipe( pipe_slow );
22045 %}
22046
22047 instruct vcastDtoF_reg(vec dst, vec src) %{
22048 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22049 match(Set dst (VectorCastD2X src));
22050 format %{ "vector_cast_d2x $dst,$src\t!" %}
22051 ins_encode %{
22052 int vlen_enc = vector_length_encoding(this, $src);
22053 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22054 %}
22055 ins_pipe( pipe_slow );
22056 %}
22057
22058 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22059 predicate(!VM_Version::supports_avx10_2() &&
22060 !VM_Version::supports_avx512vl() &&
22061 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22062 is_integral_type(Matcher::vector_element_basic_type(n)));
22063 match(Set dst (VectorCastD2X src));
22064 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22065 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22066 ins_encode %{
22067 int vlen_enc = vector_length_encoding(this, $src);
22068 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22069 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22070 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22071 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22072 %}
22073 ins_pipe( pipe_slow );
22074 %}
22075
22076 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22077 predicate(!VM_Version::supports_avx10_2() &&
22078 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22079 is_integral_type(Matcher::vector_element_basic_type(n)));
22080 match(Set dst (VectorCastD2X src));
22081 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22082 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22083 ins_encode %{
22084 int vlen_enc = vector_length_encoding(this, $src);
22085 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22086 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22087 ExternalAddress(vector_float_signflip());
22088 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22089 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22090 %}
22091 ins_pipe( pipe_slow );
22092 %}
22093
22094 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22095 predicate(VM_Version::supports_avx10_2() &&
22096 is_integral_type(Matcher::vector_element_basic_type(n)));
22097 match(Set dst (VectorCastD2X src));
22098 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22099 ins_encode %{
22100 int vlen_enc = vector_length_encoding(this, $src);
22101 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22102 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22103 %}
22104 ins_pipe( pipe_slow );
22105 %}
22106
22107 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22108 predicate(VM_Version::supports_avx10_2() &&
22109 is_integral_type(Matcher::vector_element_basic_type(n)));
22110 match(Set dst (VectorCastD2X (LoadVector src)));
22111 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22112 ins_encode %{
22113 int vlen = Matcher::vector_length(this);
22114 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22115 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22116 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22117 %}
22118 ins_pipe( pipe_slow );
22119 %}
22120
22121 instruct vucast(vec dst, vec src) %{
22122 match(Set dst (VectorUCastB2X src));
22123 match(Set dst (VectorUCastS2X src));
22124 match(Set dst (VectorUCastI2X src));
22125 format %{ "vector_ucast $dst,$src\t!" %}
22126 ins_encode %{
22127 assert(UseAVX > 0, "required");
22128
22129 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22130 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22131 int vlen_enc = vector_length_encoding(this);
22132 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22133 %}
22134 ins_pipe( pipe_slow );
22135 %}
22136
22137 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22138 predicate(!VM_Version::supports_avx512vl() &&
22139 Matcher::vector_length_in_bytes(n) < 64 &&
22140 Matcher::vector_element_basic_type(n) == T_INT);
22141 match(Set dst (RoundVF src));
22142 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22143 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22144 ins_encode %{
22145 int vlen_enc = vector_length_encoding(this);
22146 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22147 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22148 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22149 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22150 %}
22151 ins_pipe( pipe_slow );
22152 %}
22153
22154 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22155 predicate((VM_Version::supports_avx512vl() ||
22156 Matcher::vector_length_in_bytes(n) == 64) &&
22157 Matcher::vector_element_basic_type(n) == T_INT);
22158 match(Set dst (RoundVF src));
22159 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22160 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22161 ins_encode %{
22162 int vlen_enc = vector_length_encoding(this);
22163 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22164 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22165 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22166 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22167 %}
22168 ins_pipe( pipe_slow );
22169 %}
22170
22171 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22172 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22173 match(Set dst (RoundVD src));
22174 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22175 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22176 ins_encode %{
22177 int vlen_enc = vector_length_encoding(this);
22178 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22179 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22180 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22181 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22182 %}
22183 ins_pipe( pipe_slow );
22184 %}
22185
22186 // --------------------------------- VectorMaskCmp --------------------------------------
22187
22188 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22189 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22190 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22191 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22192 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22193 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22194 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22195 ins_encode %{
22196 int vlen_enc = vector_length_encoding(this, $src1);
22197 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22198 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22199 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22200 } else {
22201 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22202 }
22203 %}
22204 ins_pipe( pipe_slow );
22205 %}
22206
22207 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22208 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22209 n->bottom_type()->isa_vectmask() == nullptr &&
22210 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22211 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22212 effect(TEMP ktmp);
22213 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22214 ins_encode %{
22215 int vlen_enc = Assembler::AVX_512bit;
22216 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22217 KRegister mask = k0; // The comparison itself is not being masked.
22218 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22219 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22220 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22221 } else {
22222 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22223 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22224 }
22225 %}
22226 ins_pipe( pipe_slow );
22227 %}
22228
22229 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22230 predicate(n->bottom_type()->isa_vectmask() &&
22231 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22232 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22233 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22234 ins_encode %{
22235 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22236 int vlen_enc = vector_length_encoding(this, $src1);
22237 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22238 KRegister mask = k0; // The comparison itself is not being masked.
22239 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22240 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22241 } else {
22242 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22243 }
22244 %}
22245 ins_pipe( pipe_slow );
22246 %}
22247
22248 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22249 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22250 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22251 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22252 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22253 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22254 (n->in(2)->get_int() == BoolTest::eq ||
22255 n->in(2)->get_int() == BoolTest::lt ||
22256 n->in(2)->get_int() == BoolTest::gt)); // cond
22257 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22258 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22259 ins_encode %{
22260 int vlen_enc = vector_length_encoding(this, $src1);
22261 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22262 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22263 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22264 %}
22265 ins_pipe( pipe_slow );
22266 %}
22267
22268 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22269 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22270 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22271 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22272 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22273 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22274 (n->in(2)->get_int() == BoolTest::ne ||
22275 n->in(2)->get_int() == BoolTest::le ||
22276 n->in(2)->get_int() == BoolTest::ge)); // cond
22277 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22278 effect(TEMP dst, TEMP xtmp);
22279 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22280 ins_encode %{
22281 int vlen_enc = vector_length_encoding(this, $src1);
22282 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22283 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22284 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22285 %}
22286 ins_pipe( pipe_slow );
22287 %}
22288
22289 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22290 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22291 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22292 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22293 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22294 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22295 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22296 effect(TEMP dst, TEMP xtmp);
22297 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22298 ins_encode %{
22299 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22300 int vlen_enc = vector_length_encoding(this, $src1);
22301 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22302 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22303
22304 if (vlen_enc == Assembler::AVX_128bit) {
22305 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22306 } else {
22307 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22308 }
22309 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22310 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22311 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22312 %}
22313 ins_pipe( pipe_slow );
22314 %}
22315
22316 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22317 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22318 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22319 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22320 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22321 effect(TEMP ktmp);
22322 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22323 ins_encode %{
22324 assert(UseAVX > 2, "required");
22325
22326 int vlen_enc = vector_length_encoding(this, $src1);
22327 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22328 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22329 KRegister mask = k0; // The comparison itself is not being masked.
22330 bool merge = false;
22331 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22332
22333 switch (src1_elem_bt) {
22334 case T_INT: {
22335 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22336 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22337 break;
22338 }
22339 case T_LONG: {
22340 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22341 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22342 break;
22343 }
22344 default: assert(false, "%s", type2name(src1_elem_bt));
22345 }
22346 %}
22347 ins_pipe( pipe_slow );
22348 %}
22349
22350
22351 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22352 predicate(n->bottom_type()->isa_vectmask() &&
22353 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22354 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22355 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22356 ins_encode %{
22357 assert(UseAVX > 2, "required");
22358 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22359
22360 int vlen_enc = vector_length_encoding(this, $src1);
22361 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22362 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22363 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22364
22365 // Comparison i
22366 switch (src1_elem_bt) {
22367 case T_BYTE: {
22368 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22369 break;
22370 }
22371 case T_SHORT: {
22372 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22373 break;
22374 }
22375 case T_INT: {
22376 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22377 break;
22378 }
22379 case T_LONG: {
22380 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22381 break;
22382 }
22383 default: assert(false, "%s", type2name(src1_elem_bt));
22384 }
22385 %}
22386 ins_pipe( pipe_slow );
22387 %}
22388
22389 // Extract
22390
22391 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22392 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22393 match(Set dst (ExtractI src idx));
22394 match(Set dst (ExtractS src idx));
22395 match(Set dst (ExtractB src idx));
22396 format %{ "extractI $dst,$src,$idx\t!" %}
22397 ins_encode %{
22398 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22399
22400 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22401 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22402 %}
22403 ins_pipe( pipe_slow );
22404 %}
22405
22406 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22407 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22408 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22409 match(Set dst (ExtractI src idx));
22410 match(Set dst (ExtractS src idx));
22411 match(Set dst (ExtractB src idx));
22412 effect(TEMP vtmp);
22413 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22414 ins_encode %{
22415 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22416
22417 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22418 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22419 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22420 %}
22421 ins_pipe( pipe_slow );
22422 %}
22423
22424 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22425 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22426 match(Set dst (ExtractL src idx));
22427 format %{ "extractL $dst,$src,$idx\t!" %}
22428 ins_encode %{
22429 assert(UseSSE >= 4, "required");
22430 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22431
22432 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22433 %}
22434 ins_pipe( pipe_slow );
22435 %}
22436
22437 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22438 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22439 Matcher::vector_length(n->in(1)) == 8); // src
22440 match(Set dst (ExtractL src idx));
22441 effect(TEMP vtmp);
22442 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22443 ins_encode %{
22444 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22445
22446 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22447 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22448 %}
22449 ins_pipe( pipe_slow );
22450 %}
22451
22452 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22453 predicate(Matcher::vector_length(n->in(1)) <= 4);
22454 match(Set dst (ExtractF src idx));
22455 effect(TEMP dst, TEMP vtmp);
22456 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22457 ins_encode %{
22458 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22459
22460 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22461 %}
22462 ins_pipe( pipe_slow );
22463 %}
22464
22465 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22466 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22467 Matcher::vector_length(n->in(1)/*src*/) == 16);
22468 match(Set dst (ExtractF src idx));
22469 effect(TEMP vtmp);
22470 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22471 ins_encode %{
22472 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22473
22474 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22475 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22476 %}
22477 ins_pipe( pipe_slow );
22478 %}
22479
22480 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22481 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22482 match(Set dst (ExtractD src idx));
22483 format %{ "extractD $dst,$src,$idx\t!" %}
22484 ins_encode %{
22485 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22486
22487 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22488 %}
22489 ins_pipe( pipe_slow );
22490 %}
22491
22492 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22493 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22494 Matcher::vector_length(n->in(1)) == 8); // src
22495 match(Set dst (ExtractD src idx));
22496 effect(TEMP vtmp);
22497 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22498 ins_encode %{
22499 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22500
22501 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22502 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22503 %}
22504 ins_pipe( pipe_slow );
22505 %}
22506
22507 // --------------------------------- Vector Blend --------------------------------------
22508
22509 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22510 predicate(UseAVX == 0);
22511 match(Set dst (VectorBlend (Binary dst src) mask));
22512 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22513 effect(TEMP tmp);
22514 ins_encode %{
22515 assert(UseSSE >= 4, "required");
22516
22517 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22518 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22519 }
22520 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22521 %}
22522 ins_pipe( pipe_slow );
22523 %}
22524
22525 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22526 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22527 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22528 Matcher::vector_length_in_bytes(n) <= 32 &&
22529 is_integral_type(Matcher::vector_element_basic_type(n)));
22530 match(Set dst (VectorBlend (Binary src1 src2) mask));
22531 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22532 ins_encode %{
22533 int vlen_enc = vector_length_encoding(this);
22534 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22535 %}
22536 ins_pipe( pipe_slow );
22537 %}
22538
22539 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22540 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22541 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22542 Matcher::vector_length_in_bytes(n) <= 32 &&
22543 !is_integral_type(Matcher::vector_element_basic_type(n)));
22544 match(Set dst (VectorBlend (Binary src1 src2) mask));
22545 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22546 ins_encode %{
22547 int vlen_enc = vector_length_encoding(this);
22548 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22549 %}
22550 ins_pipe( pipe_slow );
22551 %}
22552
22553 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22554 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22555 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22556 Matcher::vector_length_in_bytes(n) <= 32);
22557 match(Set dst (VectorBlend (Binary src1 src2) mask));
22558 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22559 effect(TEMP vtmp, TEMP dst);
22560 ins_encode %{
22561 int vlen_enc = vector_length_encoding(this);
22562 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22563 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22564 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22565 %}
22566 ins_pipe( pipe_slow );
22567 %}
22568
22569 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22570 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22571 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22572 match(Set dst (VectorBlend (Binary src1 src2) mask));
22573 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22574 effect(TEMP ktmp);
22575 ins_encode %{
22576 int vlen_enc = Assembler::AVX_512bit;
22577 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22578 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22579 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22580 %}
22581 ins_pipe( pipe_slow );
22582 %}
22583
22584
22585 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22586 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22587 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22588 VM_Version::supports_avx512bw()));
22589 match(Set dst (VectorBlend (Binary src1 src2) mask));
22590 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22591 ins_encode %{
22592 int vlen_enc = vector_length_encoding(this);
22593 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22594 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22595 %}
22596 ins_pipe( pipe_slow );
22597 %}
22598
22599 // --------------------------------- ABS --------------------------------------
22600 // a = |a|
22601 instruct vabsB_reg(vec dst, vec src) %{
22602 match(Set dst (AbsVB src));
22603 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22604 ins_encode %{
22605 uint vlen = Matcher::vector_length(this);
22606 if (vlen <= 16) {
22607 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22608 } else {
22609 int vlen_enc = vector_length_encoding(this);
22610 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22611 }
22612 %}
22613 ins_pipe( pipe_slow );
22614 %}
22615
22616 instruct vabsS_reg(vec dst, vec src) %{
22617 match(Set dst (AbsVS src));
22618 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22619 ins_encode %{
22620 uint vlen = Matcher::vector_length(this);
22621 if (vlen <= 8) {
22622 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22623 } else {
22624 int vlen_enc = vector_length_encoding(this);
22625 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22626 }
22627 %}
22628 ins_pipe( pipe_slow );
22629 %}
22630
22631 instruct vabsI_reg(vec dst, vec src) %{
22632 match(Set dst (AbsVI src));
22633 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22634 ins_encode %{
22635 uint vlen = Matcher::vector_length(this);
22636 if (vlen <= 4) {
22637 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22638 } else {
22639 int vlen_enc = vector_length_encoding(this);
22640 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22641 }
22642 %}
22643 ins_pipe( pipe_slow );
22644 %}
22645
22646 instruct vabsL_reg(vec dst, vec src) %{
22647 match(Set dst (AbsVL src));
22648 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22649 ins_encode %{
22650 assert(UseAVX > 2, "required");
22651 int vlen_enc = vector_length_encoding(this);
22652 if (!VM_Version::supports_avx512vl()) {
22653 vlen_enc = Assembler::AVX_512bit;
22654 }
22655 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22656 %}
22657 ins_pipe( pipe_slow );
22658 %}
22659
22660 // --------------------------------- ABSNEG --------------------------------------
22661
22662 instruct vabsnegF(vec dst, vec src) %{
22663 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22664 match(Set dst (AbsVF src));
22665 match(Set dst (NegVF src));
22666 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22667 ins_cost(150);
22668 ins_encode %{
22669 int opcode = this->ideal_Opcode();
22670 int vlen = Matcher::vector_length(this);
22671 if (vlen == 2) {
22672 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22673 } else {
22674 assert(vlen == 8 || vlen == 16, "required");
22675 int vlen_enc = vector_length_encoding(this);
22676 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22677 }
22678 %}
22679 ins_pipe( pipe_slow );
22680 %}
22681
22682 instruct vabsneg4F(vec dst) %{
22683 predicate(Matcher::vector_length(n) == 4);
22684 match(Set dst (AbsVF dst));
22685 match(Set dst (NegVF dst));
22686 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22687 ins_cost(150);
22688 ins_encode %{
22689 int opcode = this->ideal_Opcode();
22690 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22691 %}
22692 ins_pipe( pipe_slow );
22693 %}
22694
22695 instruct vabsnegD(vec dst, vec src) %{
22696 match(Set dst (AbsVD src));
22697 match(Set dst (NegVD src));
22698 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22699 ins_encode %{
22700 int opcode = this->ideal_Opcode();
22701 uint vlen = Matcher::vector_length(this);
22702 if (vlen == 2) {
22703 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22704 } else {
22705 int vlen_enc = vector_length_encoding(this);
22706 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22707 }
22708 %}
22709 ins_pipe( pipe_slow );
22710 %}
22711
22712 //------------------------------------- VectorTest --------------------------------------------
22713
22714 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22715 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22716 match(Set cr (VectorTest src1 src2));
22717 effect(TEMP vtmp);
22718 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22719 ins_encode %{
22720 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22721 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22722 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22723 %}
22724 ins_pipe( pipe_slow );
22725 %}
22726
22727 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22728 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22729 match(Set cr (VectorTest src1 src2));
22730 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22731 ins_encode %{
22732 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22733 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22734 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22735 %}
22736 ins_pipe( pipe_slow );
22737 %}
22738
22739 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22740 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22741 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22742 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22743 match(Set cr (VectorTest src1 src2));
22744 effect(TEMP tmp);
22745 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22746 ins_encode %{
22747 uint masklen = Matcher::vector_length(this, $src1);
22748 __ kmovwl($tmp$$Register, $src1$$KRegister);
22749 __ andl($tmp$$Register, (1 << masklen) - 1);
22750 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22751 %}
22752 ins_pipe( pipe_slow );
22753 %}
22754
22755 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22756 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22757 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22758 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22759 match(Set cr (VectorTest src1 src2));
22760 effect(TEMP tmp);
22761 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22762 ins_encode %{
22763 uint masklen = Matcher::vector_length(this, $src1);
22764 __ kmovwl($tmp$$Register, $src1$$KRegister);
22765 __ andl($tmp$$Register, (1 << masklen) - 1);
22766 %}
22767 ins_pipe( pipe_slow );
22768 %}
22769
22770 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22771 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22772 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22773 match(Set cr (VectorTest src1 src2));
22774 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22775 ins_encode %{
22776 uint masklen = Matcher::vector_length(this, $src1);
22777 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22778 %}
22779 ins_pipe( pipe_slow );
22780 %}
22781
22782 //------------------------------------- LoadMask --------------------------------------------
22783
22784 instruct loadMask(legVec dst, legVec src) %{
22785 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22786 match(Set dst (VectorLoadMask src));
22787 effect(TEMP dst);
22788 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22789 ins_encode %{
22790 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22791 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22792 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22793 %}
22794 ins_pipe( pipe_slow );
22795 %}
22796
22797 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22798 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22799 match(Set dst (VectorLoadMask src));
22800 effect(TEMP xtmp);
22801 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22802 ins_encode %{
22803 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22804 true, Assembler::AVX_512bit);
22805 %}
22806 ins_pipe( pipe_slow );
22807 %}
22808
22809 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22810 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22811 match(Set dst (VectorLoadMask src));
22812 effect(TEMP xtmp);
22813 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22814 ins_encode %{
22815 int vlen_enc = vector_length_encoding(in(1));
22816 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22817 false, vlen_enc);
22818 %}
22819 ins_pipe( pipe_slow );
22820 %}
22821
22822 //------------------------------------- StoreMask --------------------------------------------
22823
22824 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22825 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22826 match(Set dst (VectorStoreMask src size));
22827 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22828 ins_encode %{
22829 int vlen = Matcher::vector_length(this);
22830 if (vlen <= 16 && UseAVX <= 2) {
22831 assert(UseSSE >= 3, "required");
22832 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22833 } else {
22834 assert(UseAVX > 0, "required");
22835 int src_vlen_enc = vector_length_encoding(this, $src);
22836 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22837 }
22838 %}
22839 ins_pipe( pipe_slow );
22840 %}
22841
22842 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22843 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22844 match(Set dst (VectorStoreMask src size));
22845 effect(TEMP_DEF dst, TEMP xtmp);
22846 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22847 ins_encode %{
22848 int vlen_enc = Assembler::AVX_128bit;
22849 int vlen = Matcher::vector_length(this);
22850 if (vlen <= 8) {
22851 assert(UseSSE >= 3, "required");
22852 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22853 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22854 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22855 } else {
22856 assert(UseAVX > 0, "required");
22857 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22858 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22859 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22860 }
22861 %}
22862 ins_pipe( pipe_slow );
22863 %}
22864
22865 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22866 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22867 match(Set dst (VectorStoreMask src size));
22868 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22869 effect(TEMP_DEF dst, TEMP xtmp);
22870 ins_encode %{
22871 int vlen_enc = Assembler::AVX_128bit;
22872 int vlen = Matcher::vector_length(this);
22873 if (vlen <= 4) {
22874 assert(UseSSE >= 3, "required");
22875 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22876 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22877 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22878 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22879 } else {
22880 assert(UseAVX > 0, "required");
22881 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22882 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22883 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22884 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22885 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22886 }
22887 %}
22888 ins_pipe( pipe_slow );
22889 %}
22890
22891 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22892 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22893 match(Set dst (VectorStoreMask src size));
22894 effect(TEMP_DEF dst, TEMP xtmp);
22895 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22896 ins_encode %{
22897 assert(UseSSE >= 3, "required");
22898 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22899 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22900 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22901 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22902 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22903 %}
22904 ins_pipe( pipe_slow );
22905 %}
22906
22907 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22908 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22909 match(Set dst (VectorStoreMask src size));
22910 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22911 effect(TEMP_DEF dst, TEMP vtmp);
22912 ins_encode %{
22913 int vlen_enc = Assembler::AVX_128bit;
22914 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22915 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22916 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22917 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22918 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22919 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22920 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22921 %}
22922 ins_pipe( pipe_slow );
22923 %}
22924
22925 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22926 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22927 match(Set dst (VectorStoreMask src size));
22928 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22929 ins_encode %{
22930 int src_vlen_enc = vector_length_encoding(this, $src);
22931 int dst_vlen_enc = vector_length_encoding(this);
22932 if (!VM_Version::supports_avx512vl()) {
22933 src_vlen_enc = Assembler::AVX_512bit;
22934 }
22935 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22936 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22937 %}
22938 ins_pipe( pipe_slow );
22939 %}
22940
22941 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22942 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22943 match(Set dst (VectorStoreMask src size));
22944 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22945 ins_encode %{
22946 int src_vlen_enc = vector_length_encoding(this, $src);
22947 int dst_vlen_enc = vector_length_encoding(this);
22948 if (!VM_Version::supports_avx512vl()) {
22949 src_vlen_enc = Assembler::AVX_512bit;
22950 }
22951 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22952 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22953 %}
22954 ins_pipe( pipe_slow );
22955 %}
22956
22957 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22958 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22959 match(Set dst (VectorStoreMask mask size));
22960 effect(TEMP_DEF dst);
22961 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22962 ins_encode %{
22963 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22964 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22965 false, Assembler::AVX_512bit, noreg);
22966 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22967 %}
22968 ins_pipe( pipe_slow );
22969 %}
22970
22971 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22972 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22973 match(Set dst (VectorStoreMask mask size));
22974 effect(TEMP_DEF dst);
22975 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22976 ins_encode %{
22977 int dst_vlen_enc = vector_length_encoding(this);
22978 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22979 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22980 %}
22981 ins_pipe( pipe_slow );
22982 %}
22983
22984 instruct vmaskcast_evex(kReg dst) %{
22985 match(Set dst (VectorMaskCast dst));
22986 ins_cost(0);
22987 format %{ "vector_mask_cast $dst" %}
22988 ins_encode %{
22989 // empty
22990 %}
22991 ins_pipe(empty);
22992 %}
22993
22994 instruct vmaskcast(vec dst) %{
22995 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22996 match(Set dst (VectorMaskCast dst));
22997 ins_cost(0);
22998 format %{ "vector_mask_cast $dst" %}
22999 ins_encode %{
23000 // empty
23001 %}
23002 ins_pipe(empty);
23003 %}
23004
23005 instruct vmaskcast_avx(vec dst, vec src) %{
23006 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23007 match(Set dst (VectorMaskCast src));
23008 format %{ "vector_mask_cast $dst, $src" %}
23009 ins_encode %{
23010 int vlen = Matcher::vector_length(this);
23011 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23012 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23013 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23014 %}
23015 ins_pipe(pipe_slow);
23016 %}
23017
23018 //-------------------------------- Load Iota Indices ----------------------------------
23019
23020 instruct loadIotaIndices(vec dst, immI_0 src) %{
23021 match(Set dst (VectorLoadConst src));
23022 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23023 ins_encode %{
23024 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23025 BasicType bt = Matcher::vector_element_basic_type(this);
23026 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23027 %}
23028 ins_pipe( pipe_slow );
23029 %}
23030
23031 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23032 match(Set dst (PopulateIndex src1 src2));
23033 effect(TEMP dst, TEMP vtmp);
23034 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23035 ins_encode %{
23036 assert($src2$$constant == 1, "required");
23037 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23038 int vlen_enc = vector_length_encoding(this);
23039 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23040 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23041 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23042 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23043 %}
23044 ins_pipe( pipe_slow );
23045 %}
23046
23047 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23048 match(Set dst (PopulateIndex src1 src2));
23049 effect(TEMP dst, TEMP vtmp);
23050 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23051 ins_encode %{
23052 assert($src2$$constant == 1, "required");
23053 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23054 int vlen_enc = vector_length_encoding(this);
23055 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23056 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23057 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23058 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23059 %}
23060 ins_pipe( pipe_slow );
23061 %}
23062
23063 //-------------------------------- Rearrange ----------------------------------
23064
23065 // LoadShuffle/Rearrange for Byte
23066 instruct rearrangeB(vec dst, vec shuffle) %{
23067 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23068 Matcher::vector_length(n) < 32);
23069 match(Set dst (VectorRearrange dst shuffle));
23070 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23071 ins_encode %{
23072 assert(UseSSE >= 4, "required");
23073 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23074 %}
23075 ins_pipe( pipe_slow );
23076 %}
23077
23078 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23079 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23080 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23081 match(Set dst (VectorRearrange src shuffle));
23082 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23083 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23084 ins_encode %{
23085 assert(UseAVX >= 2, "required");
23086 // Swap src into vtmp1
23087 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23088 // Shuffle swapped src to get entries from other 128 bit lane
23089 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23090 // Shuffle original src to get entries from self 128 bit lane
23091 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23092 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23093 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23094 // Perform the blend
23095 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23096 %}
23097 ins_pipe( pipe_slow );
23098 %}
23099
23100
23101 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23102 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23103 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23104 match(Set dst (VectorRearrange src shuffle));
23105 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23106 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23107 ins_encode %{
23108 int vlen_enc = vector_length_encoding(this);
23109 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23110 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23111 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23112 %}
23113 ins_pipe( pipe_slow );
23114 %}
23115
23116 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23117 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23118 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23119 match(Set dst (VectorRearrange src shuffle));
23120 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23121 ins_encode %{
23122 int vlen_enc = vector_length_encoding(this);
23123 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23124 %}
23125 ins_pipe( pipe_slow );
23126 %}
23127
23128 // LoadShuffle/Rearrange for Short
23129
23130 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23131 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23132 !VM_Version::supports_avx512bw());
23133 match(Set dst (VectorLoadShuffle src));
23134 effect(TEMP dst, TEMP vtmp);
23135 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23136 ins_encode %{
23137 // Create a byte shuffle mask from short shuffle mask
23138 // only byte shuffle instruction available on these platforms
23139 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23140 if (UseAVX == 0) {
23141 assert(vlen_in_bytes <= 16, "required");
23142 // Multiply each shuffle by two to get byte index
23143 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23144 __ psllw($vtmp$$XMMRegister, 1);
23145
23146 // Duplicate to create 2 copies of byte index
23147 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23148 __ psllw($dst$$XMMRegister, 8);
23149 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23150
23151 // Add one to get alternate byte index
23152 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23153 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23154 } else {
23155 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23156 int vlen_enc = vector_length_encoding(this);
23157 // Multiply each shuffle by two to get byte index
23158 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23159
23160 // Duplicate to create 2 copies of byte index
23161 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23162 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23163
23164 // Add one to get alternate byte index
23165 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23166 }
23167 %}
23168 ins_pipe( pipe_slow );
23169 %}
23170
23171 instruct rearrangeS(vec dst, vec shuffle) %{
23172 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23173 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23174 match(Set dst (VectorRearrange dst shuffle));
23175 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23176 ins_encode %{
23177 assert(UseSSE >= 4, "required");
23178 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23179 %}
23180 ins_pipe( pipe_slow );
23181 %}
23182
23183 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23184 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23185 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23186 match(Set dst (VectorRearrange src shuffle));
23187 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23188 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23189 ins_encode %{
23190 assert(UseAVX >= 2, "required");
23191 // Swap src into vtmp1
23192 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23193 // Shuffle swapped src to get entries from other 128 bit lane
23194 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23195 // Shuffle original src to get entries from self 128 bit lane
23196 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23197 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23198 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23199 // Perform the blend
23200 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23201 %}
23202 ins_pipe( pipe_slow );
23203 %}
23204
23205 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23206 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23207 VM_Version::supports_avx512bw());
23208 match(Set dst (VectorRearrange src shuffle));
23209 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23210 ins_encode %{
23211 int vlen_enc = vector_length_encoding(this);
23212 if (!VM_Version::supports_avx512vl()) {
23213 vlen_enc = Assembler::AVX_512bit;
23214 }
23215 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23216 %}
23217 ins_pipe( pipe_slow );
23218 %}
23219
23220 // LoadShuffle/Rearrange for Integer and Float
23221
23222 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23223 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23224 Matcher::vector_length(n) == 4 && UseAVX == 0);
23225 match(Set dst (VectorLoadShuffle src));
23226 effect(TEMP dst, TEMP vtmp);
23227 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23228 ins_encode %{
23229 assert(UseSSE >= 4, "required");
23230
23231 // Create a byte shuffle mask from int shuffle mask
23232 // only byte shuffle instruction available on these platforms
23233
23234 // Duplicate and multiply each shuffle by 4
23235 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23236 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23237 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23238 __ psllw($vtmp$$XMMRegister, 2);
23239
23240 // Duplicate again to create 4 copies of byte index
23241 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23242 __ psllw($dst$$XMMRegister, 8);
23243 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23244
23245 // Add 3,2,1,0 to get alternate byte index
23246 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23247 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23248 %}
23249 ins_pipe( pipe_slow );
23250 %}
23251
23252 instruct rearrangeI(vec dst, vec shuffle) %{
23253 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23254 UseAVX == 0);
23255 match(Set dst (VectorRearrange dst shuffle));
23256 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23257 ins_encode %{
23258 assert(UseSSE >= 4, "required");
23259 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23260 %}
23261 ins_pipe( pipe_slow );
23262 %}
23263
23264 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23265 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23266 UseAVX > 0);
23267 match(Set dst (VectorRearrange src shuffle));
23268 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23269 ins_encode %{
23270 int vlen_enc = vector_length_encoding(this);
23271 BasicType bt = Matcher::vector_element_basic_type(this);
23272 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23273 %}
23274 ins_pipe( pipe_slow );
23275 %}
23276
23277 // LoadShuffle/Rearrange for Long and Double
23278
23279 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23280 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23281 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23282 match(Set dst (VectorLoadShuffle src));
23283 effect(TEMP dst, TEMP vtmp);
23284 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23285 ins_encode %{
23286 assert(UseAVX >= 2, "required");
23287
23288 int vlen_enc = vector_length_encoding(this);
23289 // Create a double word shuffle mask from long shuffle mask
23290 // only double word shuffle instruction available on these platforms
23291
23292 // Multiply each shuffle by two to get double word index
23293 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23294
23295 // Duplicate each double word shuffle
23296 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23297 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23298
23299 // Add one to get alternate double word index
23300 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23301 %}
23302 ins_pipe( pipe_slow );
23303 %}
23304
23305 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23306 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23307 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23308 match(Set dst (VectorRearrange src shuffle));
23309 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23310 ins_encode %{
23311 assert(UseAVX >= 2, "required");
23312
23313 int vlen_enc = vector_length_encoding(this);
23314 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23315 %}
23316 ins_pipe( pipe_slow );
23317 %}
23318
23319 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23320 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23321 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23322 match(Set dst (VectorRearrange src shuffle));
23323 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23324 ins_encode %{
23325 assert(UseAVX > 2, "required");
23326
23327 int vlen_enc = vector_length_encoding(this);
23328 if (vlen_enc == Assembler::AVX_128bit) {
23329 vlen_enc = Assembler::AVX_256bit;
23330 }
23331 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23332 %}
23333 ins_pipe( pipe_slow );
23334 %}
23335
23336 // --------------------------------- FMA --------------------------------------
23337 // a * b + c
23338
23339 instruct vfmaF_reg(vec a, vec b, vec c) %{
23340 match(Set c (FmaVF c (Binary a b)));
23341 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23342 ins_cost(150);
23343 ins_encode %{
23344 assert(UseFMA, "not enabled");
23345 int vlen_enc = vector_length_encoding(this);
23346 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23347 %}
23348 ins_pipe( pipe_slow );
23349 %}
23350
23351 instruct vfmaF_mem(vec a, memory b, vec c) %{
23352 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23353 match(Set c (FmaVF c (Binary a (LoadVector b))));
23354 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23355 ins_cost(150);
23356 ins_encode %{
23357 assert(UseFMA, "not enabled");
23358 int vlen_enc = vector_length_encoding(this);
23359 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23360 %}
23361 ins_pipe( pipe_slow );
23362 %}
23363
23364 instruct vfmaD_reg(vec a, vec b, vec c) %{
23365 match(Set c (FmaVD c (Binary a b)));
23366 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23367 ins_cost(150);
23368 ins_encode %{
23369 assert(UseFMA, "not enabled");
23370 int vlen_enc = vector_length_encoding(this);
23371 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23372 %}
23373 ins_pipe( pipe_slow );
23374 %}
23375
23376 instruct vfmaD_mem(vec a, memory b, vec c) %{
23377 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23378 match(Set c (FmaVD c (Binary a (LoadVector b))));
23379 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23380 ins_cost(150);
23381 ins_encode %{
23382 assert(UseFMA, "not enabled");
23383 int vlen_enc = vector_length_encoding(this);
23384 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23385 %}
23386 ins_pipe( pipe_slow );
23387 %}
23388
23389 // --------------------------------- Vector Multiply Add --------------------------------------
23390
23391 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23392 predicate(UseAVX == 0);
23393 match(Set dst (MulAddVS2VI dst src1));
23394 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23395 ins_encode %{
23396 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23397 %}
23398 ins_pipe( pipe_slow );
23399 %}
23400
23401 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23402 predicate(UseAVX > 0);
23403 match(Set dst (MulAddVS2VI src1 src2));
23404 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23405 ins_encode %{
23406 int vlen_enc = vector_length_encoding(this);
23407 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23408 %}
23409 ins_pipe( pipe_slow );
23410 %}
23411
23412 // --------------------------------- Vector Multiply Add Add ----------------------------------
23413
23414 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23415 predicate(VM_Version::supports_avx512_vnni());
23416 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23417 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23418 ins_encode %{
23419 assert(UseAVX > 2, "required");
23420 int vlen_enc = vector_length_encoding(this);
23421 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23422 %}
23423 ins_pipe( pipe_slow );
23424 ins_cost(10);
23425 %}
23426
23427 // --------------------------------- PopCount --------------------------------------
23428
23429 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23430 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23431 match(Set dst (PopCountVI src));
23432 match(Set dst (PopCountVL src));
23433 format %{ "vector_popcount_integral $dst, $src" %}
23434 ins_encode %{
23435 int opcode = this->ideal_Opcode();
23436 int vlen_enc = vector_length_encoding(this, $src);
23437 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23438 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23439 %}
23440 ins_pipe( pipe_slow );
23441 %}
23442
23443 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23444 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23445 match(Set dst (PopCountVI src mask));
23446 match(Set dst (PopCountVL src mask));
23447 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23448 ins_encode %{
23449 int vlen_enc = vector_length_encoding(this, $src);
23450 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23451 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23452 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23453 %}
23454 ins_pipe( pipe_slow );
23455 %}
23456
23457 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23458 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23459 match(Set dst (PopCountVI src));
23460 match(Set dst (PopCountVL src));
23461 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23462 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23463 ins_encode %{
23464 int opcode = this->ideal_Opcode();
23465 int vlen_enc = vector_length_encoding(this, $src);
23466 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23467 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23468 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23469 %}
23470 ins_pipe( pipe_slow );
23471 %}
23472
23473 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23474
23475 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23476 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23477 Matcher::vector_length_in_bytes(n->in(1))));
23478 match(Set dst (CountTrailingZerosV src));
23479 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23480 ins_cost(400);
23481 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23482 ins_encode %{
23483 int vlen_enc = vector_length_encoding(this, $src);
23484 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23485 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23486 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23487 %}
23488 ins_pipe( pipe_slow );
23489 %}
23490
23491 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23492 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23493 VM_Version::supports_avx512cd() &&
23494 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23495 match(Set dst (CountTrailingZerosV src));
23496 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23497 ins_cost(400);
23498 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23499 ins_encode %{
23500 int vlen_enc = vector_length_encoding(this, $src);
23501 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23502 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23503 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23504 %}
23505 ins_pipe( pipe_slow );
23506 %}
23507
23508 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23509 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23510 match(Set dst (CountTrailingZerosV src));
23511 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23512 ins_cost(400);
23513 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23514 ins_encode %{
23515 int vlen_enc = vector_length_encoding(this, $src);
23516 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23517 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23518 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23519 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23520 %}
23521 ins_pipe( pipe_slow );
23522 %}
23523
23524 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23525 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23526 match(Set dst (CountTrailingZerosV src));
23527 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23528 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23529 ins_encode %{
23530 int vlen_enc = vector_length_encoding(this, $src);
23531 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23532 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23533 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23534 %}
23535 ins_pipe( pipe_slow );
23536 %}
23537
23538
23539 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23540
23541 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23542 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23543 effect(TEMP dst);
23544 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23545 ins_encode %{
23546 int vector_len = vector_length_encoding(this);
23547 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23548 %}
23549 ins_pipe( pipe_slow );
23550 %}
23551
23552 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23553 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23554 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23555 effect(TEMP dst);
23556 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23557 ins_encode %{
23558 int vector_len = vector_length_encoding(this);
23559 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23560 %}
23561 ins_pipe( pipe_slow );
23562 %}
23563
23564 // --------------------------------- Rotation Operations ----------------------------------
23565 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23566 match(Set dst (RotateLeftV src shift));
23567 match(Set dst (RotateRightV src shift));
23568 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23569 ins_encode %{
23570 int opcode = this->ideal_Opcode();
23571 int vector_len = vector_length_encoding(this);
23572 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23573 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23574 %}
23575 ins_pipe( pipe_slow );
23576 %}
23577
23578 instruct vprorate(vec dst, vec src, vec shift) %{
23579 match(Set dst (RotateLeftV src shift));
23580 match(Set dst (RotateRightV src shift));
23581 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23582 ins_encode %{
23583 int opcode = this->ideal_Opcode();
23584 int vector_len = vector_length_encoding(this);
23585 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23586 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23587 %}
23588 ins_pipe( pipe_slow );
23589 %}
23590
23591 // ---------------------------------- Masked Operations ------------------------------------
23592 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23593 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23594 match(Set dst (LoadVectorMasked mem mask));
23595 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23596 ins_encode %{
23597 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23598 int vlen_enc = vector_length_encoding(this);
23599 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23600 %}
23601 ins_pipe( pipe_slow );
23602 %}
23603
23604
23605 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23606 predicate(n->in(3)->bottom_type()->isa_vectmask());
23607 match(Set dst (LoadVectorMasked mem mask));
23608 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23609 ins_encode %{
23610 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23611 int vector_len = vector_length_encoding(this);
23612 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23613 %}
23614 ins_pipe( pipe_slow );
23615 %}
23616
23617 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23618 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23619 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23620 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23621 ins_encode %{
23622 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23623 int vlen_enc = vector_length_encoding(src_node);
23624 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23625 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23626 %}
23627 ins_pipe( pipe_slow );
23628 %}
23629
23630 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23631 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23632 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23633 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23634 ins_encode %{
23635 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23636 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23637 int vlen_enc = vector_length_encoding(src_node);
23638 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23639 %}
23640 ins_pipe( pipe_slow );
23641 %}
23642
23643 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23644 match(Set addr (VerifyVectorAlignment addr mask));
23645 effect(KILL cr);
23646 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23647 ins_encode %{
23648 Label Lskip;
23649 // check if masked bits of addr are zero
23650 __ testq($addr$$Register, $mask$$constant);
23651 __ jccb(Assembler::equal, Lskip);
23652 __ stop("verify_vector_alignment found a misaligned vector memory access");
23653 __ bind(Lskip);
23654 %}
23655 ins_pipe(pipe_slow);
23656 %}
23657
23658 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23659 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23660 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23661 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23662 ins_encode %{
23663 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23664 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23665
23666 Label DONE;
23667 int vlen_enc = vector_length_encoding(this, $src1);
23668 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23669
23670 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23671 __ mov64($dst$$Register, -1L);
23672 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23673 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23674 __ jccb(Assembler::carrySet, DONE);
23675 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23676 __ notq($dst$$Register);
23677 __ tzcntq($dst$$Register, $dst$$Register);
23678 __ bind(DONE);
23679 %}
23680 ins_pipe( pipe_slow );
23681 %}
23682
23683
23684 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23685 match(Set dst (VectorMaskGen len));
23686 effect(TEMP temp, KILL cr);
23687 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23688 ins_encode %{
23689 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23690 %}
23691 ins_pipe( pipe_slow );
23692 %}
23693
23694 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23695 match(Set dst (VectorMaskGen len));
23696 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23697 effect(TEMP temp);
23698 ins_encode %{
23699 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23700 __ kmovql($dst$$KRegister, $temp$$Register);
23701 %}
23702 ins_pipe( pipe_slow );
23703 %}
23704
23705 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23706 predicate(n->in(1)->bottom_type()->isa_vectmask());
23707 match(Set dst (VectorMaskToLong mask));
23708 effect(TEMP dst, KILL cr);
23709 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23710 ins_encode %{
23711 int opcode = this->ideal_Opcode();
23712 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23713 int mask_len = Matcher::vector_length(this, $mask);
23714 int mask_size = mask_len * type2aelembytes(mbt);
23715 int vlen_enc = vector_length_encoding(this, $mask);
23716 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23717 $dst$$Register, mask_len, mask_size, vlen_enc);
23718 %}
23719 ins_pipe( pipe_slow );
23720 %}
23721
23722 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23723 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23724 match(Set dst (VectorMaskToLong mask));
23725 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23726 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23727 ins_encode %{
23728 int opcode = this->ideal_Opcode();
23729 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23730 int mask_len = Matcher::vector_length(this, $mask);
23731 int vlen_enc = vector_length_encoding(this, $mask);
23732 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23733 $dst$$Register, mask_len, mbt, vlen_enc);
23734 %}
23735 ins_pipe( pipe_slow );
23736 %}
23737
23738 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23739 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23740 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23741 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23742 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23743 ins_encode %{
23744 int opcode = this->ideal_Opcode();
23745 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23746 int mask_len = Matcher::vector_length(this, $mask);
23747 int vlen_enc = vector_length_encoding(this, $mask);
23748 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23749 $dst$$Register, mask_len, mbt, vlen_enc);
23750 %}
23751 ins_pipe( pipe_slow );
23752 %}
23753
23754 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23755 predicate(n->in(1)->bottom_type()->isa_vectmask());
23756 match(Set dst (VectorMaskTrueCount mask));
23757 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23758 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23759 ins_encode %{
23760 int opcode = this->ideal_Opcode();
23761 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23762 int mask_len = Matcher::vector_length(this, $mask);
23763 int mask_size = mask_len * type2aelembytes(mbt);
23764 int vlen_enc = vector_length_encoding(this, $mask);
23765 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23766 $tmp$$Register, mask_len, mask_size, vlen_enc);
23767 %}
23768 ins_pipe( pipe_slow );
23769 %}
23770
23771 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23772 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23773 match(Set dst (VectorMaskTrueCount mask));
23774 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23775 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23776 ins_encode %{
23777 int opcode = this->ideal_Opcode();
23778 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23779 int mask_len = Matcher::vector_length(this, $mask);
23780 int vlen_enc = vector_length_encoding(this, $mask);
23781 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23782 $tmp$$Register, mask_len, mbt, vlen_enc);
23783 %}
23784 ins_pipe( pipe_slow );
23785 %}
23786
23787 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23788 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23789 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23790 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23791 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23792 ins_encode %{
23793 int opcode = this->ideal_Opcode();
23794 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23795 int mask_len = Matcher::vector_length(this, $mask);
23796 int vlen_enc = vector_length_encoding(this, $mask);
23797 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23798 $tmp$$Register, mask_len, mbt, vlen_enc);
23799 %}
23800 ins_pipe( pipe_slow );
23801 %}
23802
23803 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23804 predicate(n->in(1)->bottom_type()->isa_vectmask());
23805 match(Set dst (VectorMaskFirstTrue mask));
23806 match(Set dst (VectorMaskLastTrue mask));
23807 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23808 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23809 ins_encode %{
23810 int opcode = this->ideal_Opcode();
23811 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23812 int mask_len = Matcher::vector_length(this, $mask);
23813 int mask_size = mask_len * type2aelembytes(mbt);
23814 int vlen_enc = vector_length_encoding(this, $mask);
23815 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23816 $tmp$$Register, mask_len, mask_size, vlen_enc);
23817 %}
23818 ins_pipe( pipe_slow );
23819 %}
23820
23821 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23822 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23823 match(Set dst (VectorMaskFirstTrue mask));
23824 match(Set dst (VectorMaskLastTrue mask));
23825 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23826 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23827 ins_encode %{
23828 int opcode = this->ideal_Opcode();
23829 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23830 int mask_len = Matcher::vector_length(this, $mask);
23831 int vlen_enc = vector_length_encoding(this, $mask);
23832 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23833 $tmp$$Register, mask_len, mbt, vlen_enc);
23834 %}
23835 ins_pipe( pipe_slow );
23836 %}
23837
23838 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23839 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23840 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23841 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23842 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23843 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23844 ins_encode %{
23845 int opcode = this->ideal_Opcode();
23846 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23847 int mask_len = Matcher::vector_length(this, $mask);
23848 int vlen_enc = vector_length_encoding(this, $mask);
23849 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23850 $tmp$$Register, mask_len, mbt, vlen_enc);
23851 %}
23852 ins_pipe( pipe_slow );
23853 %}
23854
23855 // --------------------------------- Compress/Expand Operations ---------------------------
23856 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23857 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23858 match(Set dst (CompressV src mask));
23859 match(Set dst (ExpandV src mask));
23860 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23861 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23862 ins_encode %{
23863 int opcode = this->ideal_Opcode();
23864 int vlen_enc = vector_length_encoding(this);
23865 BasicType bt = Matcher::vector_element_basic_type(this);
23866 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23867 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23868 %}
23869 ins_pipe( pipe_slow );
23870 %}
23871
23872 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23873 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23874 match(Set dst (CompressV src mask));
23875 match(Set dst (ExpandV src mask));
23876 format %{ "vector_compress_expand $dst, $src, $mask" %}
23877 ins_encode %{
23878 int opcode = this->ideal_Opcode();
23879 int vector_len = vector_length_encoding(this);
23880 BasicType bt = Matcher::vector_element_basic_type(this);
23881 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23882 %}
23883 ins_pipe( pipe_slow );
23884 %}
23885
23886 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23887 match(Set dst (CompressM mask));
23888 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23889 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23890 ins_encode %{
23891 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23892 int mask_len = Matcher::vector_length(this);
23893 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23894 %}
23895 ins_pipe( pipe_slow );
23896 %}
23897
23898 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23899
23900 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23901 predicate(!VM_Version::supports_gfni());
23902 match(Set dst (ReverseV src));
23903 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23904 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23905 ins_encode %{
23906 int vec_enc = vector_length_encoding(this);
23907 BasicType bt = Matcher::vector_element_basic_type(this);
23908 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23909 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23910 %}
23911 ins_pipe( pipe_slow );
23912 %}
23913
23914 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23915 predicate(VM_Version::supports_gfni());
23916 match(Set dst (ReverseV src));
23917 effect(TEMP dst, TEMP xtmp);
23918 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23919 ins_encode %{
23920 int vec_enc = vector_length_encoding(this);
23921 BasicType bt = Matcher::vector_element_basic_type(this);
23922 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23923 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23924 $xtmp$$XMMRegister);
23925 %}
23926 ins_pipe( pipe_slow );
23927 %}
23928
23929 instruct vreverse_byte_reg(vec dst, vec src) %{
23930 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23931 match(Set dst (ReverseBytesV src));
23932 effect(TEMP dst);
23933 format %{ "vector_reverse_byte $dst, $src" %}
23934 ins_encode %{
23935 int vec_enc = vector_length_encoding(this);
23936 BasicType bt = Matcher::vector_element_basic_type(this);
23937 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23938 %}
23939 ins_pipe( pipe_slow );
23940 %}
23941
23942 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23943 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23944 match(Set dst (ReverseBytesV src));
23945 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23946 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23947 ins_encode %{
23948 int vec_enc = vector_length_encoding(this);
23949 BasicType bt = Matcher::vector_element_basic_type(this);
23950 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23951 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23952 %}
23953 ins_pipe( pipe_slow );
23954 %}
23955
23956 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23957
23958 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23959 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23960 Matcher::vector_length_in_bytes(n->in(1))));
23961 match(Set dst (CountLeadingZerosV src));
23962 format %{ "vector_count_leading_zeros $dst, $src" %}
23963 ins_encode %{
23964 int vlen_enc = vector_length_encoding(this, $src);
23965 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23966 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23967 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23968 %}
23969 ins_pipe( pipe_slow );
23970 %}
23971
23972 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23973 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23974 Matcher::vector_length_in_bytes(n->in(1))));
23975 match(Set dst (CountLeadingZerosV src mask));
23976 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23977 ins_encode %{
23978 int vlen_enc = vector_length_encoding(this, $src);
23979 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23980 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23981 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23982 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23983 %}
23984 ins_pipe( pipe_slow );
23985 %}
23986
23987 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23988 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23989 VM_Version::supports_avx512cd() &&
23990 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23991 match(Set dst (CountLeadingZerosV src));
23992 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23993 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23994 ins_encode %{
23995 int vlen_enc = vector_length_encoding(this, $src);
23996 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23997 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23998 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
23999 %}
24000 ins_pipe( pipe_slow );
24001 %}
24002
24003 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24004 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24005 match(Set dst (CountLeadingZerosV src));
24006 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24007 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24008 ins_encode %{
24009 int vlen_enc = vector_length_encoding(this, $src);
24010 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24011 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24012 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24013 $rtmp$$Register, true, vlen_enc);
24014 %}
24015 ins_pipe( pipe_slow );
24016 %}
24017
24018 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24019 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24020 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24021 match(Set dst (CountLeadingZerosV src));
24022 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24023 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24024 ins_encode %{
24025 int vlen_enc = vector_length_encoding(this, $src);
24026 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24027 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24028 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24029 %}
24030 ins_pipe( pipe_slow );
24031 %}
24032
24033 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24034 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24035 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24036 match(Set dst (CountLeadingZerosV src));
24037 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24038 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24039 ins_encode %{
24040 int vlen_enc = vector_length_encoding(this, $src);
24041 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24042 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24043 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24044 %}
24045 ins_pipe( pipe_slow );
24046 %}
24047
24048 // ---------------------------------- Vector Masked Operations ------------------------------------
24049
24050 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24051 match(Set dst (AddVB (Binary dst src2) mask));
24052 match(Set dst (AddVS (Binary dst src2) mask));
24053 match(Set dst (AddVI (Binary dst src2) mask));
24054 match(Set dst (AddVL (Binary dst src2) mask));
24055 match(Set dst (AddVF (Binary dst src2) mask));
24056 match(Set dst (AddVD (Binary dst src2) mask));
24057 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24058 ins_encode %{
24059 int vlen_enc = vector_length_encoding(this);
24060 BasicType bt = Matcher::vector_element_basic_type(this);
24061 int opc = this->ideal_Opcode();
24062 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24063 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24064 %}
24065 ins_pipe( pipe_slow );
24066 %}
24067
24068 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24069 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24070 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24071 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24072 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24073 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24074 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24075 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24076 ins_encode %{
24077 int vlen_enc = vector_length_encoding(this);
24078 BasicType bt = Matcher::vector_element_basic_type(this);
24079 int opc = this->ideal_Opcode();
24080 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24081 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24082 %}
24083 ins_pipe( pipe_slow );
24084 %}
24085
24086 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24087 match(Set dst (XorV (Binary dst src2) mask));
24088 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24089 ins_encode %{
24090 int vlen_enc = vector_length_encoding(this);
24091 BasicType bt = Matcher::vector_element_basic_type(this);
24092 int opc = this->ideal_Opcode();
24093 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24094 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24095 %}
24096 ins_pipe( pipe_slow );
24097 %}
24098
24099 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24100 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24101 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24102 ins_encode %{
24103 int vlen_enc = vector_length_encoding(this);
24104 BasicType bt = Matcher::vector_element_basic_type(this);
24105 int opc = this->ideal_Opcode();
24106 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24107 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24108 %}
24109 ins_pipe( pipe_slow );
24110 %}
24111
24112 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24113 match(Set dst (OrV (Binary dst src2) mask));
24114 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24115 ins_encode %{
24116 int vlen_enc = vector_length_encoding(this);
24117 BasicType bt = Matcher::vector_element_basic_type(this);
24118 int opc = this->ideal_Opcode();
24119 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24120 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24121 %}
24122 ins_pipe( pipe_slow );
24123 %}
24124
24125 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24126 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24127 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24128 ins_encode %{
24129 int vlen_enc = vector_length_encoding(this);
24130 BasicType bt = Matcher::vector_element_basic_type(this);
24131 int opc = this->ideal_Opcode();
24132 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24133 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24134 %}
24135 ins_pipe( pipe_slow );
24136 %}
24137
24138 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24139 match(Set dst (AndV (Binary dst src2) mask));
24140 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24141 ins_encode %{
24142 int vlen_enc = vector_length_encoding(this);
24143 BasicType bt = Matcher::vector_element_basic_type(this);
24144 int opc = this->ideal_Opcode();
24145 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24146 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24147 %}
24148 ins_pipe( pipe_slow );
24149 %}
24150
24151 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24152 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24153 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24154 ins_encode %{
24155 int vlen_enc = vector_length_encoding(this);
24156 BasicType bt = Matcher::vector_element_basic_type(this);
24157 int opc = this->ideal_Opcode();
24158 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24159 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24160 %}
24161 ins_pipe( pipe_slow );
24162 %}
24163
24164 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24165 match(Set dst (SubVB (Binary dst src2) mask));
24166 match(Set dst (SubVS (Binary dst src2) mask));
24167 match(Set dst (SubVI (Binary dst src2) mask));
24168 match(Set dst (SubVL (Binary dst src2) mask));
24169 match(Set dst (SubVF (Binary dst src2) mask));
24170 match(Set dst (SubVD (Binary dst src2) mask));
24171 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24172 ins_encode %{
24173 int vlen_enc = vector_length_encoding(this);
24174 BasicType bt = Matcher::vector_element_basic_type(this);
24175 int opc = this->ideal_Opcode();
24176 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24177 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24178 %}
24179 ins_pipe( pipe_slow );
24180 %}
24181
24182 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24183 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24184 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24185 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24186 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24187 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24188 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24189 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24190 ins_encode %{
24191 int vlen_enc = vector_length_encoding(this);
24192 BasicType bt = Matcher::vector_element_basic_type(this);
24193 int opc = this->ideal_Opcode();
24194 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24195 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24196 %}
24197 ins_pipe( pipe_slow );
24198 %}
24199
24200 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24201 match(Set dst (MulVS (Binary dst src2) mask));
24202 match(Set dst (MulVI (Binary dst src2) mask));
24203 match(Set dst (MulVL (Binary dst src2) mask));
24204 match(Set dst (MulVF (Binary dst src2) mask));
24205 match(Set dst (MulVD (Binary dst src2) mask));
24206 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24207 ins_encode %{
24208 int vlen_enc = vector_length_encoding(this);
24209 BasicType bt = Matcher::vector_element_basic_type(this);
24210 int opc = this->ideal_Opcode();
24211 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24212 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24213 %}
24214 ins_pipe( pipe_slow );
24215 %}
24216
24217 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24218 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24219 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24220 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24221 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24222 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24223 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24224 ins_encode %{
24225 int vlen_enc = vector_length_encoding(this);
24226 BasicType bt = Matcher::vector_element_basic_type(this);
24227 int opc = this->ideal_Opcode();
24228 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24229 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24230 %}
24231 ins_pipe( pipe_slow );
24232 %}
24233
24234 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24235 match(Set dst (SqrtVF dst mask));
24236 match(Set dst (SqrtVD dst mask));
24237 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24238 ins_encode %{
24239 int vlen_enc = vector_length_encoding(this);
24240 BasicType bt = Matcher::vector_element_basic_type(this);
24241 int opc = this->ideal_Opcode();
24242 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24243 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24244 %}
24245 ins_pipe( pipe_slow );
24246 %}
24247
24248 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24249 match(Set dst (DivVF (Binary dst src2) mask));
24250 match(Set dst (DivVD (Binary dst src2) mask));
24251 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24252 ins_encode %{
24253 int vlen_enc = vector_length_encoding(this);
24254 BasicType bt = Matcher::vector_element_basic_type(this);
24255 int opc = this->ideal_Opcode();
24256 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24257 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24258 %}
24259 ins_pipe( pipe_slow );
24260 %}
24261
24262 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24263 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24264 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24265 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24266 ins_encode %{
24267 int vlen_enc = vector_length_encoding(this);
24268 BasicType bt = Matcher::vector_element_basic_type(this);
24269 int opc = this->ideal_Opcode();
24270 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24271 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24272 %}
24273 ins_pipe( pipe_slow );
24274 %}
24275
24276
24277 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24278 match(Set dst (RotateLeftV (Binary dst shift) mask));
24279 match(Set dst (RotateRightV (Binary dst shift) mask));
24280 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24281 ins_encode %{
24282 int vlen_enc = vector_length_encoding(this);
24283 BasicType bt = Matcher::vector_element_basic_type(this);
24284 int opc = this->ideal_Opcode();
24285 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24286 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24287 %}
24288 ins_pipe( pipe_slow );
24289 %}
24290
24291 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24292 match(Set dst (RotateLeftV (Binary dst src2) mask));
24293 match(Set dst (RotateRightV (Binary dst src2) mask));
24294 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24295 ins_encode %{
24296 int vlen_enc = vector_length_encoding(this);
24297 BasicType bt = Matcher::vector_element_basic_type(this);
24298 int opc = this->ideal_Opcode();
24299 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24300 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24301 %}
24302 ins_pipe( pipe_slow );
24303 %}
24304
24305 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24306 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24307 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24308 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24309 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24310 ins_encode %{
24311 int vlen_enc = vector_length_encoding(this);
24312 BasicType bt = Matcher::vector_element_basic_type(this);
24313 int opc = this->ideal_Opcode();
24314 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24315 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24316 %}
24317 ins_pipe( pipe_slow );
24318 %}
24319
24320 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24321 predicate(!n->as_ShiftV()->is_var_shift());
24322 match(Set dst (LShiftVS (Binary dst src2) mask));
24323 match(Set dst (LShiftVI (Binary dst src2) mask));
24324 match(Set dst (LShiftVL (Binary dst src2) mask));
24325 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24326 ins_encode %{
24327 int vlen_enc = vector_length_encoding(this);
24328 BasicType bt = Matcher::vector_element_basic_type(this);
24329 int opc = this->ideal_Opcode();
24330 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24331 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24332 %}
24333 ins_pipe( pipe_slow );
24334 %}
24335
24336 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24337 predicate(n->as_ShiftV()->is_var_shift());
24338 match(Set dst (LShiftVS (Binary dst src2) mask));
24339 match(Set dst (LShiftVI (Binary dst src2) mask));
24340 match(Set dst (LShiftVL (Binary dst src2) mask));
24341 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24342 ins_encode %{
24343 int vlen_enc = vector_length_encoding(this);
24344 BasicType bt = Matcher::vector_element_basic_type(this);
24345 int opc = this->ideal_Opcode();
24346 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24347 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24348 %}
24349 ins_pipe( pipe_slow );
24350 %}
24351
24352 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24353 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24354 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24355 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24356 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24357 ins_encode %{
24358 int vlen_enc = vector_length_encoding(this);
24359 BasicType bt = Matcher::vector_element_basic_type(this);
24360 int opc = this->ideal_Opcode();
24361 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24362 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24363 %}
24364 ins_pipe( pipe_slow );
24365 %}
24366
24367 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24368 predicate(!n->as_ShiftV()->is_var_shift());
24369 match(Set dst (RShiftVS (Binary dst src2) mask));
24370 match(Set dst (RShiftVI (Binary dst src2) mask));
24371 match(Set dst (RShiftVL (Binary dst src2) mask));
24372 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24373 ins_encode %{
24374 int vlen_enc = vector_length_encoding(this);
24375 BasicType bt = Matcher::vector_element_basic_type(this);
24376 int opc = this->ideal_Opcode();
24377 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24378 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24379 %}
24380 ins_pipe( pipe_slow );
24381 %}
24382
24383 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24384 predicate(n->as_ShiftV()->is_var_shift());
24385 match(Set dst (RShiftVS (Binary dst src2) mask));
24386 match(Set dst (RShiftVI (Binary dst src2) mask));
24387 match(Set dst (RShiftVL (Binary dst src2) mask));
24388 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24389 ins_encode %{
24390 int vlen_enc = vector_length_encoding(this);
24391 BasicType bt = Matcher::vector_element_basic_type(this);
24392 int opc = this->ideal_Opcode();
24393 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24394 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24395 %}
24396 ins_pipe( pipe_slow );
24397 %}
24398
24399 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24400 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24401 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24402 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24403 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24404 ins_encode %{
24405 int vlen_enc = vector_length_encoding(this);
24406 BasicType bt = Matcher::vector_element_basic_type(this);
24407 int opc = this->ideal_Opcode();
24408 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24409 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24410 %}
24411 ins_pipe( pipe_slow );
24412 %}
24413
24414 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24415 predicate(!n->as_ShiftV()->is_var_shift());
24416 match(Set dst (URShiftVS (Binary dst src2) mask));
24417 match(Set dst (URShiftVI (Binary dst src2) mask));
24418 match(Set dst (URShiftVL (Binary dst src2) mask));
24419 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24420 ins_encode %{
24421 int vlen_enc = vector_length_encoding(this);
24422 BasicType bt = Matcher::vector_element_basic_type(this);
24423 int opc = this->ideal_Opcode();
24424 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24425 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24426 %}
24427 ins_pipe( pipe_slow );
24428 %}
24429
24430 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24431 predicate(n->as_ShiftV()->is_var_shift());
24432 match(Set dst (URShiftVS (Binary dst src2) mask));
24433 match(Set dst (URShiftVI (Binary dst src2) mask));
24434 match(Set dst (URShiftVL (Binary dst src2) mask));
24435 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24436 ins_encode %{
24437 int vlen_enc = vector_length_encoding(this);
24438 BasicType bt = Matcher::vector_element_basic_type(this);
24439 int opc = this->ideal_Opcode();
24440 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24441 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24442 %}
24443 ins_pipe( pipe_slow );
24444 %}
24445
24446 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24447 match(Set dst (MaxV (Binary dst src2) mask));
24448 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24449 ins_encode %{
24450 int vlen_enc = vector_length_encoding(this);
24451 BasicType bt = Matcher::vector_element_basic_type(this);
24452 int opc = this->ideal_Opcode();
24453 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24454 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24455 %}
24456 ins_pipe( pipe_slow );
24457 %}
24458
24459 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24460 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24461 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24462 ins_encode %{
24463 int vlen_enc = vector_length_encoding(this);
24464 BasicType bt = Matcher::vector_element_basic_type(this);
24465 int opc = this->ideal_Opcode();
24466 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24467 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24468 %}
24469 ins_pipe( pipe_slow );
24470 %}
24471
24472 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24473 match(Set dst (MinV (Binary dst src2) mask));
24474 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24475 ins_encode %{
24476 int vlen_enc = vector_length_encoding(this);
24477 BasicType bt = Matcher::vector_element_basic_type(this);
24478 int opc = this->ideal_Opcode();
24479 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24480 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24481 %}
24482 ins_pipe( pipe_slow );
24483 %}
24484
24485 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24486 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24487 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24488 ins_encode %{
24489 int vlen_enc = vector_length_encoding(this);
24490 BasicType bt = Matcher::vector_element_basic_type(this);
24491 int opc = this->ideal_Opcode();
24492 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24493 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24494 %}
24495 ins_pipe( pipe_slow );
24496 %}
24497
24498 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24499 match(Set dst (VectorRearrange (Binary dst src2) mask));
24500 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24501 ins_encode %{
24502 int vlen_enc = vector_length_encoding(this);
24503 BasicType bt = Matcher::vector_element_basic_type(this);
24504 int opc = this->ideal_Opcode();
24505 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24507 %}
24508 ins_pipe( pipe_slow );
24509 %}
24510
24511 instruct vabs_masked(vec dst, kReg mask) %{
24512 match(Set dst (AbsVB dst mask));
24513 match(Set dst (AbsVS dst mask));
24514 match(Set dst (AbsVI dst mask));
24515 match(Set dst (AbsVL dst mask));
24516 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24517 ins_encode %{
24518 int vlen_enc = vector_length_encoding(this);
24519 BasicType bt = Matcher::vector_element_basic_type(this);
24520 int opc = this->ideal_Opcode();
24521 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24522 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24523 %}
24524 ins_pipe( pipe_slow );
24525 %}
24526
24527 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24528 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24529 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24530 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24531 ins_encode %{
24532 assert(UseFMA, "Needs FMA instructions support.");
24533 int vlen_enc = vector_length_encoding(this);
24534 BasicType bt = Matcher::vector_element_basic_type(this);
24535 int opc = this->ideal_Opcode();
24536 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24537 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24538 %}
24539 ins_pipe( pipe_slow );
24540 %}
24541
24542 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24543 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24544 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24545 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24546 ins_encode %{
24547 assert(UseFMA, "Needs FMA instructions support.");
24548 int vlen_enc = vector_length_encoding(this);
24549 BasicType bt = Matcher::vector_element_basic_type(this);
24550 int opc = this->ideal_Opcode();
24551 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24552 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24553 %}
24554 ins_pipe( pipe_slow );
24555 %}
24556
24557 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24558 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24559 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24560 ins_encode %{
24561 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24562 int vlen_enc = vector_length_encoding(this, $src1);
24563 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24564
24565 // Comparison i
24566 switch (src1_elem_bt) {
24567 case T_BYTE: {
24568 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24569 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24570 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24571 break;
24572 }
24573 case T_SHORT: {
24574 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24575 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24576 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24577 break;
24578 }
24579 case T_INT: {
24580 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24581 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24582 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24583 break;
24584 }
24585 case T_LONG: {
24586 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24587 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24588 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24589 break;
24590 }
24591 case T_FLOAT: {
24592 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24593 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24594 break;
24595 }
24596 case T_DOUBLE: {
24597 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24598 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24599 break;
24600 }
24601 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24602 }
24603 %}
24604 ins_pipe( pipe_slow );
24605 %}
24606
24607 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24608 predicate(Matcher::vector_length(n) <= 32);
24609 match(Set dst (MaskAll src));
24610 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24611 ins_encode %{
24612 int mask_len = Matcher::vector_length(this);
24613 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24614 %}
24615 ins_pipe( pipe_slow );
24616 %}
24617
24618 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24619 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24620 match(Set dst (XorVMask src (MaskAll cnt)));
24621 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24622 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24623 ins_encode %{
24624 uint masklen = Matcher::vector_length(this);
24625 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24626 %}
24627 ins_pipe( pipe_slow );
24628 %}
24629
24630 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24631 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24632 (Matcher::vector_length(n) == 16) ||
24633 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24634 match(Set dst (XorVMask src (MaskAll cnt)));
24635 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24636 ins_encode %{
24637 uint masklen = Matcher::vector_length(this);
24638 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24639 %}
24640 ins_pipe( pipe_slow );
24641 %}
24642
24643 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24644 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24645 match(Set dst (VectorLongToMask src));
24646 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24647 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24648 ins_encode %{
24649 int mask_len = Matcher::vector_length(this);
24650 int vec_enc = vector_length_encoding(mask_len);
24651 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24652 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24653 %}
24654 ins_pipe( pipe_slow );
24655 %}
24656
24657
24658 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24659 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24660 match(Set dst (VectorLongToMask src));
24661 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24662 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24663 ins_encode %{
24664 int mask_len = Matcher::vector_length(this);
24665 assert(mask_len <= 32, "invalid mask length");
24666 int vec_enc = vector_length_encoding(mask_len);
24667 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24668 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24669 %}
24670 ins_pipe( pipe_slow );
24671 %}
24672
24673 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24674 predicate(n->bottom_type()->isa_vectmask());
24675 match(Set dst (VectorLongToMask src));
24676 format %{ "long_to_mask_evex $dst, $src\t!" %}
24677 ins_encode %{
24678 __ kmov($dst$$KRegister, $src$$Register);
24679 %}
24680 ins_pipe( pipe_slow );
24681 %}
24682
24683 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24684 match(Set dst (AndVMask src1 src2));
24685 match(Set dst (OrVMask src1 src2));
24686 match(Set dst (XorVMask src1 src2));
24687 effect(TEMP kscratch);
24688 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24689 ins_encode %{
24690 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24691 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24692 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24693 uint masklen = Matcher::vector_length(this);
24694 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24695 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24696 %}
24697 ins_pipe( pipe_slow );
24698 %}
24699
24700 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24701 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24702 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24703 ins_encode %{
24704 int vlen_enc = vector_length_encoding(this);
24705 BasicType bt = Matcher::vector_element_basic_type(this);
24706 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24707 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24708 %}
24709 ins_pipe( pipe_slow );
24710 %}
24711
24712 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24713 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24714 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24715 ins_encode %{
24716 int vlen_enc = vector_length_encoding(this);
24717 BasicType bt = Matcher::vector_element_basic_type(this);
24718 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24719 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24720 %}
24721 ins_pipe( pipe_slow );
24722 %}
24723
24724 instruct castMM(kReg dst)
24725 %{
24726 match(Set dst (CastVV dst));
24727
24728 size(0);
24729 format %{ "# castVV of $dst" %}
24730 ins_encode(/* empty encoding */);
24731 ins_cost(0);
24732 ins_pipe(empty);
24733 %}
24734
24735 instruct castVV(vec dst)
24736 %{
24737 match(Set dst (CastVV dst));
24738
24739 size(0);
24740 format %{ "# castVV of $dst" %}
24741 ins_encode(/* empty encoding */);
24742 ins_cost(0);
24743 ins_pipe(empty);
24744 %}
24745
24746 instruct castVVLeg(legVec dst)
24747 %{
24748 match(Set dst (CastVV dst));
24749
24750 size(0);
24751 format %{ "# castVV of $dst" %}
24752 ins_encode(/* empty encoding */);
24753 ins_cost(0);
24754 ins_pipe(empty);
24755 %}
24756
24757 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24758 %{
24759 match(Set dst (IsInfiniteF src));
24760 effect(TEMP ktmp, KILL cr);
24761 format %{ "float_class_check $dst, $src" %}
24762 ins_encode %{
24763 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24764 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24765 %}
24766 ins_pipe(pipe_slow);
24767 %}
24768
24769 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24770 %{
24771 match(Set dst (IsInfiniteD src));
24772 effect(TEMP ktmp, KILL cr);
24773 format %{ "double_class_check $dst, $src" %}
24774 ins_encode %{
24775 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24776 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24777 %}
24778 ins_pipe(pipe_slow);
24779 %}
24780
24781 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24782 %{
24783 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24784 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24785 match(Set dst (SaturatingAddV src1 src2));
24786 match(Set dst (SaturatingSubV src1 src2));
24787 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24788 ins_encode %{
24789 int vlen_enc = vector_length_encoding(this);
24790 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24791 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24792 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24793 %}
24794 ins_pipe(pipe_slow);
24795 %}
24796
24797 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24798 %{
24799 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24800 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24801 match(Set dst (SaturatingAddV src1 src2));
24802 match(Set dst (SaturatingSubV src1 src2));
24803 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24804 ins_encode %{
24805 int vlen_enc = vector_length_encoding(this);
24806 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24807 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24808 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24809 %}
24810 ins_pipe(pipe_slow);
24811 %}
24812
24813 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24814 %{
24815 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24816 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24817 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24818 match(Set dst (SaturatingAddV src1 src2));
24819 match(Set dst (SaturatingSubV src1 src2));
24820 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24821 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24822 ins_encode %{
24823 int vlen_enc = vector_length_encoding(this);
24824 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24825 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24826 $src1$$XMMRegister, $src2$$XMMRegister,
24827 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24828 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24829 %}
24830 ins_pipe(pipe_slow);
24831 %}
24832
24833 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24834 %{
24835 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24836 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24837 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24838 match(Set dst (SaturatingAddV src1 src2));
24839 match(Set dst (SaturatingSubV src1 src2));
24840 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24841 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24842 ins_encode %{
24843 int vlen_enc = vector_length_encoding(this);
24844 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24845 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24846 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24847 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24848 %}
24849 ins_pipe(pipe_slow);
24850 %}
24851
24852 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24853 %{
24854 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24855 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24856 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24857 match(Set dst (SaturatingAddV src1 src2));
24858 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24859 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24860 ins_encode %{
24861 int vlen_enc = vector_length_encoding(this);
24862 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24863 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24864 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24865 %}
24866 ins_pipe(pipe_slow);
24867 %}
24868
24869 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24870 %{
24871 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24872 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24873 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24874 match(Set dst (SaturatingAddV src1 src2));
24875 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24876 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24877 ins_encode %{
24878 int vlen_enc = vector_length_encoding(this);
24879 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24880 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24881 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24882 %}
24883 ins_pipe(pipe_slow);
24884 %}
24885
24886 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24887 %{
24888 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24889 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24890 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24891 match(Set dst (SaturatingSubV src1 src2));
24892 effect(TEMP ktmp);
24893 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24894 ins_encode %{
24895 int vlen_enc = vector_length_encoding(this);
24896 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24897 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24898 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24899 %}
24900 ins_pipe(pipe_slow);
24901 %}
24902
24903 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24904 %{
24905 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24906 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24907 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24908 match(Set dst (SaturatingSubV src1 src2));
24909 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24910 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24911 ins_encode %{
24912 int vlen_enc = vector_length_encoding(this);
24913 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24914 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24915 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24916 %}
24917 ins_pipe(pipe_slow);
24918 %}
24919
24920 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24921 %{
24922 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24923 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24924 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24925 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24926 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24927 ins_encode %{
24928 int vlen_enc = vector_length_encoding(this);
24929 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24930 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24931 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24932 %}
24933 ins_pipe(pipe_slow);
24934 %}
24935
24936 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24937 %{
24938 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24939 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24940 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24941 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24942 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24943 ins_encode %{
24944 int vlen_enc = vector_length_encoding(this);
24945 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24946 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24947 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24948 %}
24949 ins_pipe(pipe_slow);
24950 %}
24951
24952 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24953 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24954 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24955 match(Set dst (SaturatingAddV (Binary dst src) mask));
24956 match(Set dst (SaturatingSubV (Binary dst src) mask));
24957 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24958 ins_encode %{
24959 int vlen_enc = vector_length_encoding(this);
24960 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24961 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24962 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24963 %}
24964 ins_pipe( pipe_slow );
24965 %}
24966
24967 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24968 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24969 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24970 match(Set dst (SaturatingAddV (Binary dst src) mask));
24971 match(Set dst (SaturatingSubV (Binary dst src) mask));
24972 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24973 ins_encode %{
24974 int vlen_enc = vector_length_encoding(this);
24975 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24976 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24977 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24978 %}
24979 ins_pipe( pipe_slow );
24980 %}
24981
24982 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24983 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24984 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24985 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24986 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24987 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24988 ins_encode %{
24989 int vlen_enc = vector_length_encoding(this);
24990 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24991 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24992 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24993 %}
24994 ins_pipe( pipe_slow );
24995 %}
24996
24997 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
24998 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24999 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25000 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25001 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25002 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25003 ins_encode %{
25004 int vlen_enc = vector_length_encoding(this);
25005 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25006 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25007 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25008 %}
25009 ins_pipe( pipe_slow );
25010 %}
25011
25012 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25013 %{
25014 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25015 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25016 ins_encode %{
25017 int vlen_enc = vector_length_encoding(this);
25018 BasicType bt = Matcher::vector_element_basic_type(this);
25019 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25020 %}
25021 ins_pipe(pipe_slow);
25022 %}
25023
25024 instruct reinterpretS2HF(regF dst, rRegI src)
25025 %{
25026 match(Set dst (ReinterpretS2HF src));
25027 format %{ "vmovw $dst, $src" %}
25028 ins_encode %{
25029 __ vmovw($dst$$XMMRegister, $src$$Register);
25030 %}
25031 ins_pipe(pipe_slow);
25032 %}
25033
25034 instruct reinterpretHF2S(rRegI dst, regF src)
25035 %{
25036 match(Set dst (ReinterpretHF2S src));
25037 format %{ "vmovw $dst, $src" %}
25038 ins_encode %{
25039 __ vmovw($dst$$Register, $src$$XMMRegister);
25040 %}
25041 ins_pipe(pipe_slow);
25042 %}
25043
25044 instruct convF2HFAndS2HF(regF dst, regF src)
25045 %{
25046 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25047 format %{ "convF2HFAndS2HF $dst, $src" %}
25048 ins_encode %{
25049 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25050 %}
25051 ins_pipe(pipe_slow);
25052 %}
25053
25054 instruct convHF2SAndHF2F(regF dst, regF src)
25055 %{
25056 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25057 format %{ "convHF2SAndHF2F $dst, $src" %}
25058 ins_encode %{
25059 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25060 %}
25061 ins_pipe(pipe_slow);
25062 %}
25063
25064 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25065 %{
25066 match(Set dst (SqrtHF src));
25067 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25068 ins_encode %{
25069 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25070 %}
25071 ins_pipe(pipe_slow);
25072 %}
25073
25074 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25075 %{
25076 match(Set dst (AddHF src1 src2));
25077 match(Set dst (DivHF src1 src2));
25078 match(Set dst (MulHF src1 src2));
25079 match(Set dst (SubHF src1 src2));
25080 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25081 ins_encode %{
25082 int opcode = this->ideal_Opcode();
25083 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25084 %}
25085 ins_pipe(pipe_slow);
25086 %}
25087
25088 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25089 %{
25090 predicate(VM_Version::supports_avx10_2());
25091 match(Set dst (MaxHF src1 src2));
25092 match(Set dst (MinHF src1 src2));
25093 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25094 ins_encode %{
25095 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25096 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25097 %}
25098 ins_pipe( pipe_slow );
25099 %}
25100
25101 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25102 %{
25103 predicate(!VM_Version::supports_avx10_2());
25104 match(Set dst (MaxHF src1 src2));
25105 match(Set dst (MinHF src1 src2));
25106 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25107 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25108 ins_encode %{
25109 int opcode = this->ideal_Opcode();
25110 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25111 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25112 %}
25113 ins_pipe( pipe_slow );
25114 %}
25115
25116 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25117 %{
25118 match(Set dst (FmaHF src2 (Binary dst src1)));
25119 effect(DEF dst);
25120 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25121 ins_encode %{
25122 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25123 %}
25124 ins_pipe( pipe_slow );
25125 %}
25126
25127
25128 instruct vector_sqrt_HF_reg(vec dst, vec src)
25129 %{
25130 match(Set dst (SqrtVHF src));
25131 format %{ "vector_sqrt_fp16 $dst, $src" %}
25132 ins_encode %{
25133 int vlen_enc = vector_length_encoding(this);
25134 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25135 %}
25136 ins_pipe(pipe_slow);
25137 %}
25138
25139 instruct vector_sqrt_HF_mem(vec dst, memory src)
25140 %{
25141 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25142 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25143 ins_encode %{
25144 int vlen_enc = vector_length_encoding(this);
25145 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25146 %}
25147 ins_pipe(pipe_slow);
25148 %}
25149
25150 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25151 %{
25152 match(Set dst (AddVHF src1 src2));
25153 match(Set dst (DivVHF src1 src2));
25154 match(Set dst (MulVHF src1 src2));
25155 match(Set dst (SubVHF src1 src2));
25156 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25157 ins_encode %{
25158 int vlen_enc = vector_length_encoding(this);
25159 int opcode = this->ideal_Opcode();
25160 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25161 %}
25162 ins_pipe(pipe_slow);
25163 %}
25164
25165
25166 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25167 %{
25168 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25169 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25170 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25171 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25172 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25173 ins_encode %{
25174 int vlen_enc = vector_length_encoding(this);
25175 int opcode = this->ideal_Opcode();
25176 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25177 %}
25178 ins_pipe(pipe_slow);
25179 %}
25180
25181 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25182 %{
25183 match(Set dst (FmaVHF src2 (Binary dst src1)));
25184 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25185 ins_encode %{
25186 int vlen_enc = vector_length_encoding(this);
25187 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25188 %}
25189 ins_pipe( pipe_slow );
25190 %}
25191
25192 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25193 %{
25194 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25195 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25196 ins_encode %{
25197 int vlen_enc = vector_length_encoding(this);
25198 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25199 %}
25200 ins_pipe( pipe_slow );
25201 %}
25202
25203 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25204 %{
25205 predicate(VM_Version::supports_avx10_2());
25206 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25207 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25208 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25209 ins_encode %{
25210 int vlen_enc = vector_length_encoding(this);
25211 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25212 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25213 %}
25214 ins_pipe( pipe_slow );
25215 %}
25216
25217 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25218 %{
25219 predicate(VM_Version::supports_avx10_2());
25220 match(Set dst (MinVHF src1 src2));
25221 match(Set dst (MaxVHF src1 src2));
25222 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25223 ins_encode %{
25224 int vlen_enc = vector_length_encoding(this);
25225 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25226 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25227 %}
25228 ins_pipe( pipe_slow );
25229 %}
25230
25231 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25232 %{
25233 predicate(!VM_Version::supports_avx10_2());
25234 match(Set dst (MinVHF src1 src2));
25235 match(Set dst (MaxVHF src1 src2));
25236 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25237 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25238 ins_encode %{
25239 int vlen_enc = vector_length_encoding(this);
25240 int opcode = this->ideal_Opcode();
25241 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25242 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25243 %}
25244 ins_pipe( pipe_slow );
25245 %}
25246
25247 //----------PEEPHOLE RULES-----------------------------------------------------
25248 // These must follow all instruction definitions as they use the names
25249 // defined in the instructions definitions.
25250 //
25251 // peeppredicate ( rule_predicate );
25252 // // the predicate unless which the peephole rule will be ignored
25253 //
25254 // peepmatch ( root_instr_name [preceding_instruction]* );
25255 //
25256 // peepprocedure ( procedure_name );
25257 // // provide a procedure name to perform the optimization, the procedure should
25258 // // reside in the architecture dependent peephole file, the method has the
25259 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25260 // // with the arguments being the basic block, the current node index inside the
25261 // // block, the register allocator, the functions upon invoked return a new node
25262 // // defined in peepreplace, and the rules of the nodes appearing in the
25263 // // corresponding peepmatch, the function return true if successful, else
25264 // // return false
25265 //
25266 // peepconstraint %{
25267 // (instruction_number.operand_name relational_op instruction_number.operand_name
25268 // [, ...] );
25269 // // instruction numbers are zero-based using left to right order in peepmatch
25270 //
25271 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25272 // // provide an instruction_number.operand_name for each operand that appears
25273 // // in the replacement instruction's match rule
25274 //
25275 // ---------VM FLAGS---------------------------------------------------------
25276 //
25277 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25278 //
25279 // Each peephole rule is given an identifying number starting with zero and
25280 // increasing by one in the order seen by the parser. An individual peephole
25281 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25282 // on the command-line.
25283 //
25284 // ---------CURRENT LIMITATIONS----------------------------------------------
25285 //
25286 // Only transformations inside a basic block (do we need more for peephole)
25287 //
25288 // ---------EXAMPLE----------------------------------------------------------
25289 //
25290 // // pertinent parts of existing instructions in architecture description
25291 // instruct movI(rRegI dst, rRegI src)
25292 // %{
25293 // match(Set dst (CopyI src));
25294 // %}
25295 //
25296 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25297 // %{
25298 // match(Set dst (AddI dst src));
25299 // effect(KILL cr);
25300 // %}
25301 //
25302 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25303 // %{
25304 // match(Set dst (AddI dst src));
25305 // %}
25306 //
25307 // 1. Simple replacement
25308 // - Only match adjacent instructions in same basic block
25309 // - Only equality constraints
25310 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25311 // - Only one replacement instruction
25312 //
25313 // // Change (inc mov) to lea
25314 // peephole %{
25315 // // lea should only be emitted when beneficial
25316 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25317 // // increment preceded by register-register move
25318 // peepmatch ( incI_rReg movI );
25319 // // require that the destination register of the increment
25320 // // match the destination register of the move
25321 // peepconstraint ( 0.dst == 1.dst );
25322 // // construct a replacement instruction that sets
25323 // // the destination to ( move's source register + one )
25324 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25325 // %}
25326 //
25327 // 2. Procedural replacement
25328 // - More flexible finding relevent nodes
25329 // - More flexible constraints
25330 // - More flexible transformations
25331 // - May utilise architecture-dependent API more effectively
25332 // - Currently only one replacement instruction due to adlc parsing capabilities
25333 //
25334 // // Change (inc mov) to lea
25335 // peephole %{
25336 // // lea should only be emitted when beneficial
25337 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25338 // // the rule numbers of these nodes inside are passed into the function below
25339 // peepmatch ( incI_rReg movI );
25340 // // the method that takes the responsibility of transformation
25341 // peepprocedure ( inc_mov_to_lea );
25342 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25343 // // node is passed into the function above
25344 // peepreplace ( leaI_rReg_immI() );
25345 // %}
25346
25347 // These instructions is not matched by the matcher but used by the peephole
25348 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25349 %{
25350 predicate(false);
25351 match(Set dst (AddI src1 src2));
25352 format %{ "leal $dst, [$src1 + $src2]" %}
25353 ins_encode %{
25354 Register dst = $dst$$Register;
25355 Register src1 = $src1$$Register;
25356 Register src2 = $src2$$Register;
25357 if (src1 != rbp && src1 != r13) {
25358 __ leal(dst, Address(src1, src2, Address::times_1));
25359 } else {
25360 assert(src2 != rbp && src2 != r13, "");
25361 __ leal(dst, Address(src2, src1, Address::times_1));
25362 }
25363 %}
25364 ins_pipe(ialu_reg_reg);
25365 %}
25366
25367 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25368 %{
25369 predicate(false);
25370 match(Set dst (AddI src1 src2));
25371 format %{ "leal $dst, [$src1 + $src2]" %}
25372 ins_encode %{
25373 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25374 %}
25375 ins_pipe(ialu_reg_reg);
25376 %}
25377
25378 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25379 %{
25380 predicate(false);
25381 match(Set dst (LShiftI src shift));
25382 format %{ "leal $dst, [$src << $shift]" %}
25383 ins_encode %{
25384 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25385 Register src = $src$$Register;
25386 if (scale == Address::times_2 && src != rbp && src != r13) {
25387 __ leal($dst$$Register, Address(src, src, Address::times_1));
25388 } else {
25389 __ leal($dst$$Register, Address(noreg, src, scale));
25390 }
25391 %}
25392 ins_pipe(ialu_reg_reg);
25393 %}
25394
25395 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25396 %{
25397 predicate(false);
25398 match(Set dst (AddL src1 src2));
25399 format %{ "leaq $dst, [$src1 + $src2]" %}
25400 ins_encode %{
25401 Register dst = $dst$$Register;
25402 Register src1 = $src1$$Register;
25403 Register src2 = $src2$$Register;
25404 if (src1 != rbp && src1 != r13) {
25405 __ leaq(dst, Address(src1, src2, Address::times_1));
25406 } else {
25407 assert(src2 != rbp && src2 != r13, "");
25408 __ leaq(dst, Address(src2, src1, Address::times_1));
25409 }
25410 %}
25411 ins_pipe(ialu_reg_reg);
25412 %}
25413
25414 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25415 %{
25416 predicate(false);
25417 match(Set dst (AddL src1 src2));
25418 format %{ "leaq $dst, [$src1 + $src2]" %}
25419 ins_encode %{
25420 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25421 %}
25422 ins_pipe(ialu_reg_reg);
25423 %}
25424
25425 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25426 %{
25427 predicate(false);
25428 match(Set dst (LShiftL src shift));
25429 format %{ "leaq $dst, [$src << $shift]" %}
25430 ins_encode %{
25431 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25432 Register src = $src$$Register;
25433 if (scale == Address::times_2 && src != rbp && src != r13) {
25434 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25435 } else {
25436 __ leaq($dst$$Register, Address(noreg, src, scale));
25437 }
25438 %}
25439 ins_pipe(ialu_reg_reg);
25440 %}
25441
25442 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25443 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25444 // processors with at least partial ALU support for lea
25445 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25446 // beneficial for processors with full ALU support
25447 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25448
25449 peephole
25450 %{
25451 peeppredicate(VM_Version::supports_fast_2op_lea());
25452 peepmatch (addI_rReg);
25453 peepprocedure (lea_coalesce_reg);
25454 peepreplace (leaI_rReg_rReg_peep());
25455 %}
25456
25457 peephole
25458 %{
25459 peeppredicate(VM_Version::supports_fast_2op_lea());
25460 peepmatch (addI_rReg_imm);
25461 peepprocedure (lea_coalesce_imm);
25462 peepreplace (leaI_rReg_immI_peep());
25463 %}
25464
25465 peephole
25466 %{
25467 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25468 VM_Version::is_intel_cascade_lake());
25469 peepmatch (incI_rReg);
25470 peepprocedure (lea_coalesce_imm);
25471 peepreplace (leaI_rReg_immI_peep());
25472 %}
25473
25474 peephole
25475 %{
25476 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25477 VM_Version::is_intel_cascade_lake());
25478 peepmatch (decI_rReg);
25479 peepprocedure (lea_coalesce_imm);
25480 peepreplace (leaI_rReg_immI_peep());
25481 %}
25482
25483 peephole
25484 %{
25485 peeppredicate(VM_Version::supports_fast_2op_lea());
25486 peepmatch (salI_rReg_immI2);
25487 peepprocedure (lea_coalesce_imm);
25488 peepreplace (leaI_rReg_immI2_peep());
25489 %}
25490
25491 peephole
25492 %{
25493 peeppredicate(VM_Version::supports_fast_2op_lea());
25494 peepmatch (addL_rReg);
25495 peepprocedure (lea_coalesce_reg);
25496 peepreplace (leaL_rReg_rReg_peep());
25497 %}
25498
25499 peephole
25500 %{
25501 peeppredicate(VM_Version::supports_fast_2op_lea());
25502 peepmatch (addL_rReg_imm);
25503 peepprocedure (lea_coalesce_imm);
25504 peepreplace (leaL_rReg_immL32_peep());
25505 %}
25506
25507 peephole
25508 %{
25509 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25510 VM_Version::is_intel_cascade_lake());
25511 peepmatch (incL_rReg);
25512 peepprocedure (lea_coalesce_imm);
25513 peepreplace (leaL_rReg_immL32_peep());
25514 %}
25515
25516 peephole
25517 %{
25518 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25519 VM_Version::is_intel_cascade_lake());
25520 peepmatch (decL_rReg);
25521 peepprocedure (lea_coalesce_imm);
25522 peepreplace (leaL_rReg_immL32_peep());
25523 %}
25524
25525 peephole
25526 %{
25527 peeppredicate(VM_Version::supports_fast_2op_lea());
25528 peepmatch (salL_rReg_immI2);
25529 peepprocedure (lea_coalesce_imm);
25530 peepreplace (leaL_rReg_immI2_peep());
25531 %}
25532
25533 peephole
25534 %{
25535 peepmatch (leaPCompressedOopOffset);
25536 peepprocedure (lea_remove_redundant);
25537 %}
25538
25539 peephole
25540 %{
25541 peepmatch (leaP8Narrow);
25542 peepprocedure (lea_remove_redundant);
25543 %}
25544
25545 peephole
25546 %{
25547 peepmatch (leaP32Narrow);
25548 peepprocedure (lea_remove_redundant);
25549 %}
25550
25551 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25552 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25553
25554 //int variant
25555 peephole
25556 %{
25557 peepmatch (testI_reg);
25558 peepprocedure (test_may_remove);
25559 %}
25560
25561 //long variant
25562 peephole
25563 %{
25564 peepmatch (testL_reg);
25565 peepprocedure (test_may_remove);
25566 %}
25567
25568
25569 //----------SMARTSPILL RULES---------------------------------------------------
25570 // These must follow all instruction definitions as they use the names
25571 // defined in the instructions definitions.