1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2637 assert(EnableVectorSupport, "sanity");
2638 int lo = XMM0_num;
2639 int hi = XMM0b_num;
2640 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2641 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2642 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2643 return OptoRegPair(hi, lo);
2644 }
2645
2646 // Is this branch offset short enough that a short branch can be used?
2647 //
2648 // NOTE: If the platform does not provide any short branch variants, then
2649 // this method should return false for offset 0.
2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2651 // The passed offset is relative to address of the branch.
2652 // On 86 a branch displacement is calculated relative to address
2653 // of a next instruction.
2654 offset -= br_size;
2655
2656 // the short version of jmpConUCF2 contains multiple branches,
2657 // making the reach slightly less
2658 if (rule == jmpConUCF2_rule)
2659 return (-126 <= offset && offset <= 125);
2660 return (-128 <= offset && offset <= 127);
2661 }
2662
2663 // Return whether or not this register is ever used as an argument.
2664 // This function is used on startup to build the trampoline stubs in
2665 // generateOptoStub. Registers not mentioned will be killed by the VM
2666 // call in the trampoline, and arguments in those registers not be
2667 // available to the callee.
2668 bool Matcher::can_be_java_arg(int reg)
2669 {
2670 return
2671 reg == RDI_num || reg == RDI_H_num ||
2672 reg == RSI_num || reg == RSI_H_num ||
2673 reg == RDX_num || reg == RDX_H_num ||
2674 reg == RCX_num || reg == RCX_H_num ||
2675 reg == R8_num || reg == R8_H_num ||
2676 reg == R9_num || reg == R9_H_num ||
2677 reg == R12_num || reg == R12_H_num ||
2678 reg == XMM0_num || reg == XMM0b_num ||
2679 reg == XMM1_num || reg == XMM1b_num ||
2680 reg == XMM2_num || reg == XMM2b_num ||
2681 reg == XMM3_num || reg == XMM3b_num ||
2682 reg == XMM4_num || reg == XMM4b_num ||
2683 reg == XMM5_num || reg == XMM5b_num ||
2684 reg == XMM6_num || reg == XMM6b_num ||
2685 reg == XMM7_num || reg == XMM7b_num;
2686 }
2687
2688 bool Matcher::is_spillable_arg(int reg)
2689 {
2690 return can_be_java_arg(reg);
2691 }
2692
2693 uint Matcher::int_pressure_limit()
2694 {
2695 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2696 }
2697
2698 uint Matcher::float_pressure_limit()
2699 {
2700 // After experiment around with different values, the following default threshold
2701 // works best for LCM's register pressure scheduling on x64.
2702 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2703 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2704 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2705 }
2706
2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2708 // In 64 bit mode a code which use multiply when
2709 // devisor is constant is faster than hardware
2710 // DIV instruction (it uses MulHiL).
2711 return false;
2712 }
2713
2714 // Register for DIVI projection of divmodI
2715 const RegMask& Matcher::divI_proj_mask() {
2716 return INT_RAX_REG_mask();
2717 }
2718
2719 // Register for MODI projection of divmodI
2720 const RegMask& Matcher::modI_proj_mask() {
2721 return INT_RDX_REG_mask();
2722 }
2723
2724 // Register for DIVL projection of divmodL
2725 const RegMask& Matcher::divL_proj_mask() {
2726 return LONG_RAX_REG_mask();
2727 }
2728
2729 // Register for MODL projection of divmodL
2730 const RegMask& Matcher::modL_proj_mask() {
2731 return LONG_RDX_REG_mask();
2732 }
2733
2734 %}
2735
2736 source_hpp %{
2737 // Header information of the source block.
2738 // Method declarations/definitions which are used outside
2739 // the ad-scope can conveniently be defined here.
2740 //
2741 // To keep related declarations/definitions/uses close together,
2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2743
2744 #include "runtime/vm_version.hpp"
2745
2746 class NativeJump;
2747
2748 class CallStubImpl {
2749
2750 //--------------------------------------------------------------
2751 //---< Used for optimization in Compile::shorten_branches >---
2752 //--------------------------------------------------------------
2753
2754 public:
2755 // Size of call trampoline stub.
2756 static uint size_call_trampoline() {
2757 return 0; // no call trampolines on this platform
2758 }
2759
2760 // number of relocations needed by a call trampoline stub
2761 static uint reloc_call_trampoline() {
2762 return 0; // no call trampolines on this platform
2763 }
2764 };
2765
2766 class HandlerImpl {
2767
2768 public:
2769
2770 static int emit_deopt_handler(C2_MacroAssembler* masm);
2771
2772 static uint size_deopt_handler() {
2773 // one call and one jmp.
2774 return 7;
2775 }
2776 };
2777
2778 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2779 switch(bytes) {
2780 case 4: // fall-through
2781 case 8: // fall-through
2782 case 16: return Assembler::AVX_128bit;
2783 case 32: return Assembler::AVX_256bit;
2784 case 64: return Assembler::AVX_512bit;
2785
2786 default: {
2787 ShouldNotReachHere();
2788 return Assembler::AVX_NoVec;
2789 }
2790 }
2791 }
2792
2793 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2794 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2795 }
2796
2797 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2798 uint def_idx = use->operand_index(opnd);
2799 Node* def = use->in(def_idx);
2800 return vector_length_encoding(def);
2801 }
2802
2803 static inline bool is_vector_popcount_predicate(BasicType bt) {
2804 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2805 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2806 }
2807
2808 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2809 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2810 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2811 }
2812
2813 class Node::PD {
2814 public:
2815 enum NodeFlags {
2816 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2817 Flag_sets_carry_flag = Node::_last_flag << 2,
2818 Flag_sets_parity_flag = Node::_last_flag << 3,
2819 Flag_sets_zero_flag = Node::_last_flag << 4,
2820 Flag_sets_overflow_flag = Node::_last_flag << 5,
2821 Flag_sets_sign_flag = Node::_last_flag << 6,
2822 Flag_clears_carry_flag = Node::_last_flag << 7,
2823 Flag_clears_parity_flag = Node::_last_flag << 8,
2824 Flag_clears_zero_flag = Node::_last_flag << 9,
2825 Flag_clears_overflow_flag = Node::_last_flag << 10,
2826 Flag_clears_sign_flag = Node::_last_flag << 11,
2827 _last_flag = Flag_clears_sign_flag
2828 };
2829 };
2830
2831 %} // end source_hpp
2832
2833 source %{
2834
2835 #include "opto/addnode.hpp"
2836 #include "c2_intelJccErratum_x86.hpp"
2837
2838 void PhaseOutput::pd_perform_mach_node_analysis() {
2839 if (VM_Version::has_intel_jcc_erratum()) {
2840 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2841 _buf_sizes._code += extra_padding;
2842 }
2843 }
2844
2845 int MachNode::pd_alignment_required() const {
2846 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2847 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2848 return IntelJccErratum::largest_jcc_size() + 1;
2849 } else {
2850 return 1;
2851 }
2852 }
2853
2854 int MachNode::compute_padding(int current_offset) const {
2855 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2856 Compile* C = Compile::current();
2857 PhaseOutput* output = C->output();
2858 Block* block = output->block();
2859 int index = output->index();
2860 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2861 } else {
2862 return 0;
2863 }
2864 }
2865
2866 // Emit deopt handler code.
2867 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2868
2869 // Note that the code buffer's insts_mark is always relative to insts.
2870 // That's why we must use the macroassembler to generate a handler.
2871 address base = __ start_a_stub(size_deopt_handler());
2872 if (base == nullptr) {
2873 ciEnv::current()->record_failure("CodeCache is full");
2874 return 0; // CodeBuffer::expand failed
2875 }
2876 int offset = __ offset();
2877
2878 Label start;
2879 __ bind(start);
2880
2881 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2882
2883 int entry_offset = __ offset();
2884
2885 __ jmp(start);
2886
2887 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2888 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2889 "out of bounds read in post-call NOP check");
2890 __ end_a_stub();
2891 return entry_offset;
2892 }
2893
2894 static Assembler::Width widthForType(BasicType bt) {
2895 if (bt == T_BYTE) {
2896 return Assembler::B;
2897 } else if (bt == T_SHORT) {
2898 return Assembler::W;
2899 } else if (bt == T_INT) {
2900 return Assembler::D;
2901 } else {
2902 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2903 return Assembler::Q;
2904 }
2905 }
2906
2907 //=============================================================================
2908
2909 // Float masks come from different places depending on platform.
2910 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2911 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2912 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2913 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2914 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2915 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2916 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2917 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2918 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2919 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2920 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2921 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2922 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2923 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2924 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2925 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2926 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2927 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2928 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2929
2930 //=============================================================================
2931 bool Matcher::match_rule_supported(int opcode) {
2932 if (!has_match_rule(opcode)) {
2933 return false; // no match rule present
2934 }
2935 switch (opcode) {
2936 case Op_AbsVL:
2937 case Op_StoreVectorScatter:
2938 if (UseAVX < 3) {
2939 return false;
2940 }
2941 break;
2942 case Op_PopCountI:
2943 case Op_PopCountL:
2944 if (!UsePopCountInstruction) {
2945 return false;
2946 }
2947 break;
2948 case Op_PopCountVI:
2949 if (UseAVX < 2) {
2950 return false;
2951 }
2952 break;
2953 case Op_CompressV:
2954 case Op_ExpandV:
2955 case Op_PopCountVL:
2956 if (UseAVX < 2) {
2957 return false;
2958 }
2959 break;
2960 case Op_MulVI:
2961 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
2962 return false;
2963 }
2964 break;
2965 case Op_MulVL:
2966 if (UseSSE < 4) { // only with SSE4_1 or AVX
2967 return false;
2968 }
2969 break;
2970 case Op_MulReductionVL:
2971 if (VM_Version::supports_avx512dq() == false) {
2972 return false;
2973 }
2974 break;
2975 case Op_AbsVB:
2976 case Op_AbsVS:
2977 case Op_AbsVI:
2978 case Op_AddReductionVI:
2979 case Op_AndReductionV:
2980 case Op_OrReductionV:
2981 case Op_XorReductionV:
2982 if (UseSSE < 3) { // requires at least SSSE3
2983 return false;
2984 }
2985 break;
2986 case Op_MaxHF:
2987 case Op_MinHF:
2988 if (!VM_Version::supports_avx512vlbw()) {
2989 return false;
2990 } // fallthrough
2991 case Op_AddHF:
2992 case Op_DivHF:
2993 case Op_FmaHF:
2994 case Op_MulHF:
2995 case Op_ReinterpretS2HF:
2996 case Op_ReinterpretHF2S:
2997 case Op_SubHF:
2998 case Op_SqrtHF:
2999 if (!VM_Version::supports_avx512_fp16()) {
3000 return false;
3001 }
3002 break;
3003 case Op_VectorLoadShuffle:
3004 case Op_VectorRearrange:
3005 case Op_MulReductionVI:
3006 if (UseSSE < 4) { // requires at least SSE4
3007 return false;
3008 }
3009 break;
3010 case Op_IsInfiniteF:
3011 case Op_IsInfiniteD:
3012 if (!VM_Version::supports_avx512dq()) {
3013 return false;
3014 }
3015 break;
3016 case Op_SqrtVD:
3017 case Op_SqrtVF:
3018 case Op_VectorMaskCmp:
3019 case Op_VectorCastB2X:
3020 case Op_VectorCastS2X:
3021 case Op_VectorCastI2X:
3022 case Op_VectorCastL2X:
3023 case Op_VectorCastF2X:
3024 case Op_VectorCastD2X:
3025 case Op_VectorUCastB2X:
3026 case Op_VectorUCastS2X:
3027 case Op_VectorUCastI2X:
3028 case Op_VectorMaskCast:
3029 if (UseAVX < 1) { // enabled for AVX only
3030 return false;
3031 }
3032 break;
3033 case Op_PopulateIndex:
3034 if (UseAVX < 2) {
3035 return false;
3036 }
3037 break;
3038 case Op_RoundVF:
3039 if (UseAVX < 2) { // enabled for AVX2 only
3040 return false;
3041 }
3042 break;
3043 case Op_RoundVD:
3044 if (UseAVX < 3) {
3045 return false; // enabled for AVX3 only
3046 }
3047 break;
3048 case Op_CompareAndSwapL:
3049 case Op_CompareAndSwapP:
3050 break;
3051 case Op_StrIndexOf:
3052 if (!UseSSE42Intrinsics) {
3053 return false;
3054 }
3055 break;
3056 case Op_StrIndexOfChar:
3057 if (!UseSSE42Intrinsics) {
3058 return false;
3059 }
3060 break;
3061 case Op_OnSpinWait:
3062 if (VM_Version::supports_on_spin_wait() == false) {
3063 return false;
3064 }
3065 break;
3066 case Op_MulVB:
3067 case Op_LShiftVB:
3068 case Op_RShiftVB:
3069 case Op_URShiftVB:
3070 case Op_VectorInsert:
3071 case Op_VectorLoadMask:
3072 case Op_VectorStoreMask:
3073 case Op_VectorBlend:
3074 if (UseSSE < 4) {
3075 return false;
3076 }
3077 break;
3078 case Op_MaxD:
3079 case Op_MaxF:
3080 case Op_MinD:
3081 case Op_MinF:
3082 if (UseAVX < 1) { // enabled for AVX only
3083 return false;
3084 }
3085 break;
3086 case Op_CacheWB:
3087 case Op_CacheWBPreSync:
3088 case Op_CacheWBPostSync:
3089 if (!VM_Version::supports_data_cache_line_flush()) {
3090 return false;
3091 }
3092 break;
3093 case Op_ExtractB:
3094 case Op_ExtractL:
3095 case Op_ExtractI:
3096 case Op_RoundDoubleMode:
3097 if (UseSSE < 4) {
3098 return false;
3099 }
3100 break;
3101 case Op_RoundDoubleModeV:
3102 if (VM_Version::supports_avx() == false) {
3103 return false; // 128bit vroundpd is not available
3104 }
3105 break;
3106 case Op_LoadVectorGather:
3107 case Op_LoadVectorGatherMasked:
3108 if (UseAVX < 2) {
3109 return false;
3110 }
3111 break;
3112 case Op_FmaF:
3113 case Op_FmaD:
3114 case Op_FmaVD:
3115 case Op_FmaVF:
3116 if (!UseFMA) {
3117 return false;
3118 }
3119 break;
3120 case Op_MacroLogicV:
3121 if (UseAVX < 3 || !UseVectorMacroLogic) {
3122 return false;
3123 }
3124 break;
3125
3126 case Op_VectorCmpMasked:
3127 case Op_VectorMaskGen:
3128 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3129 return false;
3130 }
3131 break;
3132 case Op_VectorMaskFirstTrue:
3133 case Op_VectorMaskLastTrue:
3134 case Op_VectorMaskTrueCount:
3135 case Op_VectorMaskToLong:
3136 if (UseAVX < 1) {
3137 return false;
3138 }
3139 break;
3140 case Op_RoundF:
3141 case Op_RoundD:
3142 break;
3143 case Op_CopySignD:
3144 case Op_CopySignF:
3145 if (UseAVX < 3) {
3146 return false;
3147 }
3148 if (!VM_Version::supports_avx512vl()) {
3149 return false;
3150 }
3151 break;
3152 case Op_CompressBits:
3153 case Op_ExpandBits:
3154 if (!VM_Version::supports_bmi2()) {
3155 return false;
3156 }
3157 break;
3158 case Op_CompressM:
3159 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3160 return false;
3161 }
3162 break;
3163 case Op_ConvF2HF:
3164 case Op_ConvHF2F:
3165 if (!VM_Version::supports_float16()) {
3166 return false;
3167 }
3168 break;
3169 case Op_VectorCastF2HF:
3170 case Op_VectorCastHF2F:
3171 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3172 return false;
3173 }
3174 break;
3175 }
3176 return true; // Match rules are supported by default.
3177 }
3178
3179 //------------------------------------------------------------------------
3180
3181 static inline bool is_pop_count_instr_target(BasicType bt) {
3182 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3183 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3184 }
3185
3186 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3187 return match_rule_supported_vector(opcode, vlen, bt);
3188 }
3189
3190 // Identify extra cases that we might want to provide match rules for vector nodes and
3191 // other intrinsics guarded with vector length (vlen) and element type (bt).
3192 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3193 if (!match_rule_supported(opcode)) {
3194 return false;
3195 }
3196 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3197 // * SSE2 supports 128bit vectors for all types;
3198 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3199 // * AVX2 supports 256bit vectors for all types;
3200 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3201 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3202 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3203 // And MaxVectorSize is taken into account as well.
3204 if (!vector_size_supported(bt, vlen)) {
3205 return false;
3206 }
3207 // Special cases which require vector length follow:
3208 // * implementation limitations
3209 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3210 // * 128bit vroundpd instruction is present only in AVX1
3211 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3212 switch (opcode) {
3213 case Op_MaxVHF:
3214 case Op_MinVHF:
3215 if (!VM_Version::supports_avx512bw()) {
3216 return false;
3217 }
3218 case Op_AddVHF:
3219 case Op_DivVHF:
3220 case Op_FmaVHF:
3221 case Op_MulVHF:
3222 case Op_SubVHF:
3223 case Op_SqrtVHF:
3224 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3225 return false;
3226 }
3227 if (!VM_Version::supports_avx512_fp16()) {
3228 return false;
3229 }
3230 break;
3231 case Op_AbsVF:
3232 case Op_NegVF:
3233 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3234 return false; // 512bit vandps and vxorps are not available
3235 }
3236 break;
3237 case Op_AbsVD:
3238 case Op_NegVD:
3239 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3240 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3241 }
3242 break;
3243 case Op_RotateRightV:
3244 case Op_RotateLeftV:
3245 if (bt != T_INT && bt != T_LONG) {
3246 return false;
3247 } // fallthrough
3248 case Op_MacroLogicV:
3249 if (!VM_Version::supports_evex() ||
3250 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3251 return false;
3252 }
3253 break;
3254 case Op_ClearArray:
3255 case Op_VectorMaskGen:
3256 case Op_VectorCmpMasked:
3257 if (!VM_Version::supports_avx512bw()) {
3258 return false;
3259 }
3260 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3261 return false;
3262 }
3263 break;
3264 case Op_LoadVectorMasked:
3265 case Op_StoreVectorMasked:
3266 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3267 return false;
3268 }
3269 break;
3270 case Op_UMinV:
3271 case Op_UMaxV:
3272 if (UseAVX == 0) {
3273 return false;
3274 }
3275 break;
3276 case Op_MaxV:
3277 case Op_MinV:
3278 if (UseSSE < 4 && is_integral_type(bt)) {
3279 return false;
3280 }
3281 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3282 // Float/Double intrinsics are enabled for AVX family currently.
3283 if (UseAVX == 0) {
3284 return false;
3285 }
3286 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3287 return false;
3288 }
3289 }
3290 break;
3291 case Op_CallLeafVector:
3292 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3293 return false;
3294 }
3295 break;
3296 case Op_AddReductionVI:
3297 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3298 return false;
3299 }
3300 // fallthrough
3301 case Op_AndReductionV:
3302 case Op_OrReductionV:
3303 case Op_XorReductionV:
3304 if (is_subword_type(bt) && (UseSSE < 4)) {
3305 return false;
3306 }
3307 break;
3308 case Op_MinReductionV:
3309 case Op_MaxReductionV:
3310 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3311 return false;
3312 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3313 return false;
3314 }
3315 // Float/Double intrinsics enabled for AVX family.
3316 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3317 return false;
3318 }
3319 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3320 return false;
3321 }
3322 break;
3323 case Op_VectorTest:
3324 if (UseSSE < 4) {
3325 return false; // Implementation limitation
3326 } else if (size_in_bits < 32) {
3327 return false; // Implementation limitation
3328 }
3329 break;
3330 case Op_VectorLoadShuffle:
3331 case Op_VectorRearrange:
3332 if(vlen == 2) {
3333 return false; // Implementation limitation due to how shuffle is loaded
3334 } else if (size_in_bits == 256 && UseAVX < 2) {
3335 return false; // Implementation limitation
3336 }
3337 break;
3338 case Op_VectorLoadMask:
3339 case Op_VectorMaskCast:
3340 if (size_in_bits == 256 && UseAVX < 2) {
3341 return false; // Implementation limitation
3342 }
3343 // fallthrough
3344 case Op_VectorStoreMask:
3345 if (vlen == 2) {
3346 return false; // Implementation limitation
3347 }
3348 break;
3349 case Op_PopulateIndex:
3350 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3351 return false;
3352 }
3353 break;
3354 case Op_VectorCastB2X:
3355 case Op_VectorCastS2X:
3356 case Op_VectorCastI2X:
3357 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3358 return false;
3359 }
3360 break;
3361 case Op_VectorCastL2X:
3362 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3363 return false;
3364 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3365 return false;
3366 }
3367 break;
3368 case Op_VectorCastF2X: {
3369 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3370 // happen after intermediate conversion to integer and special handling
3371 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3372 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3373 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3374 return false;
3375 }
3376 }
3377 // fallthrough
3378 case Op_VectorCastD2X:
3379 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3380 return false;
3381 }
3382 break;
3383 case Op_VectorCastF2HF:
3384 case Op_VectorCastHF2F:
3385 if (!VM_Version::supports_f16c() &&
3386 ((!VM_Version::supports_evex() ||
3387 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3388 return false;
3389 }
3390 break;
3391 case Op_RoundVD:
3392 if (!VM_Version::supports_avx512dq()) {
3393 return false;
3394 }
3395 break;
3396 case Op_MulReductionVI:
3397 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3398 return false;
3399 }
3400 break;
3401 case Op_LoadVectorGatherMasked:
3402 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3403 return false;
3404 }
3405 if (is_subword_type(bt) &&
3406 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3407 (size_in_bits < 64) ||
3408 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3409 return false;
3410 }
3411 break;
3412 case Op_StoreVectorScatterMasked:
3413 case Op_StoreVectorScatter:
3414 if (is_subword_type(bt)) {
3415 return false;
3416 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3417 return false;
3418 }
3419 // fallthrough
3420 case Op_LoadVectorGather:
3421 if (!is_subword_type(bt) && size_in_bits == 64) {
3422 return false;
3423 }
3424 if (is_subword_type(bt) && size_in_bits < 64) {
3425 return false;
3426 }
3427 break;
3428 case Op_SaturatingAddV:
3429 case Op_SaturatingSubV:
3430 if (UseAVX < 1) {
3431 return false; // Implementation limitation
3432 }
3433 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3434 return false;
3435 }
3436 break;
3437 case Op_SelectFromTwoVector:
3438 if (size_in_bits < 128) {
3439 return false;
3440 }
3441 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3442 return false;
3443 }
3444 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3445 return false;
3446 }
3447 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3448 return false;
3449 }
3450 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3451 return false;
3452 }
3453 break;
3454 case Op_MaskAll:
3455 if (!VM_Version::supports_evex()) {
3456 return false;
3457 }
3458 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3459 return false;
3460 }
3461 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3462 return false;
3463 }
3464 break;
3465 case Op_VectorMaskCmp:
3466 if (vlen < 2 || size_in_bits < 32) {
3467 return false;
3468 }
3469 break;
3470 case Op_CompressM:
3471 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3472 return false;
3473 }
3474 break;
3475 case Op_CompressV:
3476 case Op_ExpandV:
3477 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3478 return false;
3479 }
3480 if (size_in_bits < 128 ) {
3481 return false;
3482 }
3483 case Op_VectorLongToMask:
3484 if (UseAVX < 1) {
3485 return false;
3486 }
3487 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3488 return false;
3489 }
3490 break;
3491 case Op_SignumVD:
3492 case Op_SignumVF:
3493 if (UseAVX < 1) {
3494 return false;
3495 }
3496 break;
3497 case Op_PopCountVI:
3498 case Op_PopCountVL: {
3499 if (!is_pop_count_instr_target(bt) &&
3500 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3501 return false;
3502 }
3503 }
3504 break;
3505 case Op_ReverseV:
3506 case Op_ReverseBytesV:
3507 if (UseAVX < 2) {
3508 return false;
3509 }
3510 break;
3511 case Op_CountTrailingZerosV:
3512 case Op_CountLeadingZerosV:
3513 if (UseAVX < 2) {
3514 return false;
3515 }
3516 break;
3517 }
3518 return true; // Per default match rules are supported.
3519 }
3520
3521 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3522 // ADLC based match_rule_supported routine checks for the existence of pattern based
3523 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3524 // of their non-masked counterpart with mask edge being the differentiator.
3525 // This routine does a strict check on the existence of masked operation patterns
3526 // by returning a default false value for all the other opcodes apart from the
3527 // ones whose masked instruction patterns are defined in this file.
3528 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3529 return false;
3530 }
3531
3532 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3533 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 switch(opcode) {
3537 // Unary masked operations
3538 case Op_AbsVB:
3539 case Op_AbsVS:
3540 if(!VM_Version::supports_avx512bw()) {
3541 return false; // Implementation limitation
3542 }
3543 case Op_AbsVI:
3544 case Op_AbsVL:
3545 return true;
3546
3547 // Ternary masked operations
3548 case Op_FmaVF:
3549 case Op_FmaVD:
3550 return true;
3551
3552 case Op_MacroLogicV:
3553 if(bt != T_INT && bt != T_LONG) {
3554 return false;
3555 }
3556 return true;
3557
3558 // Binary masked operations
3559 case Op_AddVB:
3560 case Op_AddVS:
3561 case Op_SubVB:
3562 case Op_SubVS:
3563 case Op_MulVS:
3564 case Op_LShiftVS:
3565 case Op_RShiftVS:
3566 case Op_URShiftVS:
3567 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3568 if (!VM_Version::supports_avx512bw()) {
3569 return false; // Implementation limitation
3570 }
3571 return true;
3572
3573 case Op_MulVL:
3574 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3575 if (!VM_Version::supports_avx512dq()) {
3576 return false; // Implementation limitation
3577 }
3578 return true;
3579
3580 case Op_AndV:
3581 case Op_OrV:
3582 case Op_XorV:
3583 case Op_RotateRightV:
3584 case Op_RotateLeftV:
3585 if (bt != T_INT && bt != T_LONG) {
3586 return false; // Implementation limitation
3587 }
3588 return true;
3589
3590 case Op_VectorLoadMask:
3591 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3592 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3593 return false;
3594 }
3595 return true;
3596
3597 case Op_AddVI:
3598 case Op_AddVL:
3599 case Op_AddVF:
3600 case Op_AddVD:
3601 case Op_SubVI:
3602 case Op_SubVL:
3603 case Op_SubVF:
3604 case Op_SubVD:
3605 case Op_MulVI:
3606 case Op_MulVF:
3607 case Op_MulVD:
3608 case Op_DivVF:
3609 case Op_DivVD:
3610 case Op_SqrtVF:
3611 case Op_SqrtVD:
3612 case Op_LShiftVI:
3613 case Op_LShiftVL:
3614 case Op_RShiftVI:
3615 case Op_RShiftVL:
3616 case Op_URShiftVI:
3617 case Op_URShiftVL:
3618 case Op_LoadVectorMasked:
3619 case Op_StoreVectorMasked:
3620 case Op_LoadVectorGatherMasked:
3621 case Op_StoreVectorScatterMasked:
3622 return true;
3623
3624 case Op_UMinV:
3625 case Op_UMaxV:
3626 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3627 return false;
3628 } // fallthrough
3629 case Op_MaxV:
3630 case Op_MinV:
3631 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3632 return false; // Implementation limitation
3633 }
3634 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3635 return false; // Implementation limitation
3636 }
3637 return true;
3638 case Op_SaturatingAddV:
3639 case Op_SaturatingSubV:
3640 if (!is_subword_type(bt)) {
3641 return false;
3642 }
3643 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3644 return false; // Implementation limitation
3645 }
3646 return true;
3647
3648 case Op_VectorMaskCmp:
3649 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3650 return false; // Implementation limitation
3651 }
3652 return true;
3653
3654 case Op_VectorRearrange:
3655 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3656 return false; // Implementation limitation
3657 }
3658 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3659 return false; // Implementation limitation
3660 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3661 return false; // Implementation limitation
3662 }
3663 return true;
3664
3665 // Binary Logical operations
3666 case Op_AndVMask:
3667 case Op_OrVMask:
3668 case Op_XorVMask:
3669 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3670 return false; // Implementation limitation
3671 }
3672 return true;
3673
3674 case Op_PopCountVI:
3675 case Op_PopCountVL:
3676 if (!is_pop_count_instr_target(bt)) {
3677 return false;
3678 }
3679 return true;
3680
3681 case Op_MaskAll:
3682 return true;
3683
3684 case Op_CountLeadingZerosV:
3685 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3686 return true;
3687 }
3688 default:
3689 return false;
3690 }
3691 }
3692
3693 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3694 return false;
3695 }
3696
3697 // Return true if Vector::rearrange needs preparation of the shuffle argument
3698 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3699 switch (elem_bt) {
3700 case T_BYTE: return false;
3701 case T_SHORT: return !VM_Version::supports_avx512bw();
3702 case T_INT: return !VM_Version::supports_avx();
3703 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3704 default:
3705 ShouldNotReachHere();
3706 return false;
3707 }
3708 }
3709
3710 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3711 // Prefer predicate if the mask type is "TypeVectMask".
3712 return vt->isa_vectmask() != nullptr;
3713 }
3714
3715 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3716 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3717 bool legacy = (generic_opnd->opcode() == LEGVEC);
3718 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3719 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3720 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3721 return new legVecZOper();
3722 }
3723 if (legacy) {
3724 switch (ideal_reg) {
3725 case Op_VecS: return new legVecSOper();
3726 case Op_VecD: return new legVecDOper();
3727 case Op_VecX: return new legVecXOper();
3728 case Op_VecY: return new legVecYOper();
3729 case Op_VecZ: return new legVecZOper();
3730 }
3731 } else {
3732 switch (ideal_reg) {
3733 case Op_VecS: return new vecSOper();
3734 case Op_VecD: return new vecDOper();
3735 case Op_VecX: return new vecXOper();
3736 case Op_VecY: return new vecYOper();
3737 case Op_VecZ: return new vecZOper();
3738 }
3739 }
3740 ShouldNotReachHere();
3741 return nullptr;
3742 }
3743
3744 bool Matcher::is_reg2reg_move(MachNode* m) {
3745 switch (m->rule()) {
3746 case MoveVec2Leg_rule:
3747 case MoveLeg2Vec_rule:
3748 case MoveF2VL_rule:
3749 case MoveF2LEG_rule:
3750 case MoveVL2F_rule:
3751 case MoveLEG2F_rule:
3752 case MoveD2VL_rule:
3753 case MoveD2LEG_rule:
3754 case MoveVL2D_rule:
3755 case MoveLEG2D_rule:
3756 return true;
3757 default:
3758 return false;
3759 }
3760 }
3761
3762 bool Matcher::is_generic_vector(MachOper* opnd) {
3763 switch (opnd->opcode()) {
3764 case VEC:
3765 case LEGVEC:
3766 return true;
3767 default:
3768 return false;
3769 }
3770 }
3771
3772 //------------------------------------------------------------------------
3773
3774 const RegMask* Matcher::predicate_reg_mask(void) {
3775 return &_VECTMASK_REG_mask;
3776 }
3777
3778 // Max vector size in bytes. 0 if not supported.
3779 int Matcher::vector_width_in_bytes(BasicType bt) {
3780 assert(is_java_primitive(bt), "only primitive type vectors");
3781 // SSE2 supports 128bit vectors for all types.
3782 // AVX2 supports 256bit vectors for all types.
3783 // AVX2/EVEX supports 512bit vectors for all types.
3784 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3785 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3786 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3787 size = (UseAVX > 2) ? 64 : 32;
3788 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3789 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3790 // Use flag to limit vector size.
3791 size = MIN2(size,(int)MaxVectorSize);
3792 // Minimum 2 values in vector (or 4 for bytes).
3793 switch (bt) {
3794 case T_DOUBLE:
3795 case T_LONG:
3796 if (size < 16) return 0;
3797 break;
3798 case T_FLOAT:
3799 case T_INT:
3800 if (size < 8) return 0;
3801 break;
3802 case T_BOOLEAN:
3803 if (size < 4) return 0;
3804 break;
3805 case T_CHAR:
3806 if (size < 4) return 0;
3807 break;
3808 case T_BYTE:
3809 if (size < 4) return 0;
3810 break;
3811 case T_SHORT:
3812 if (size < 4) return 0;
3813 break;
3814 default:
3815 ShouldNotReachHere();
3816 }
3817 return size;
3818 }
3819
3820 // Limits on vector size (number of elements) loaded into vector.
3821 int Matcher::max_vector_size(const BasicType bt) {
3822 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3823 }
3824 int Matcher::min_vector_size(const BasicType bt) {
3825 int max_size = max_vector_size(bt);
3826 // Min size which can be loaded into vector is 4 bytes.
3827 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3828 // Support for calling svml double64 vectors
3829 if (bt == T_DOUBLE) {
3830 size = 1;
3831 }
3832 return MIN2(size,max_size);
3833 }
3834
3835 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3836 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3837 // by default on Cascade Lake
3838 if (VM_Version::is_default_intel_cascade_lake()) {
3839 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3840 }
3841 return Matcher::max_vector_size(bt);
3842 }
3843
3844 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3845 return -1;
3846 }
3847
3848 // Vector ideal reg corresponding to specified size in bytes
3849 uint Matcher::vector_ideal_reg(int size) {
3850 assert(MaxVectorSize >= size, "");
3851 switch(size) {
3852 case 4: return Op_VecS;
3853 case 8: return Op_VecD;
3854 case 16: return Op_VecX;
3855 case 32: return Op_VecY;
3856 case 64: return Op_VecZ;
3857 }
3858 ShouldNotReachHere();
3859 return 0;
3860 }
3861
3862 // Check for shift by small constant as well
3863 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3864 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3865 shift->in(2)->get_int() <= 3 &&
3866 // Are there other uses besides address expressions?
3867 !matcher->is_visited(shift)) {
3868 address_visited.set(shift->_idx); // Flag as address_visited
3869 mstack.push(shift->in(2), Matcher::Visit);
3870 Node *conv = shift->in(1);
3871 // Allow Matcher to match the rule which bypass
3872 // ConvI2L operation for an array index on LP64
3873 // if the index value is positive.
3874 if (conv->Opcode() == Op_ConvI2L &&
3875 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3876 // Are there other uses besides address expressions?
3877 !matcher->is_visited(conv)) {
3878 address_visited.set(conv->_idx); // Flag as address_visited
3879 mstack.push(conv->in(1), Matcher::Pre_Visit);
3880 } else {
3881 mstack.push(conv, Matcher::Pre_Visit);
3882 }
3883 return true;
3884 }
3885 return false;
3886 }
3887
3888 // This function identifies sub-graphs in which a 'load' node is
3889 // input to two different nodes, and such that it can be matched
3890 // with BMI instructions like blsi, blsr, etc.
3891 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3892 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3893 // refers to the same node.
3894 //
3895 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3896 // This is a temporary solution until we make DAGs expressible in ADL.
3897 template<typename ConType>
3898 class FusedPatternMatcher {
3899 Node* _op1_node;
3900 Node* _mop_node;
3901 int _con_op;
3902
3903 static int match_next(Node* n, int next_op, int next_op_idx) {
3904 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3905 return -1;
3906 }
3907
3908 if (next_op_idx == -1) { // n is commutative, try rotations
3909 if (n->in(1)->Opcode() == next_op) {
3910 return 1;
3911 } else if (n->in(2)->Opcode() == next_op) {
3912 return 2;
3913 }
3914 } else {
3915 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3916 if (n->in(next_op_idx)->Opcode() == next_op) {
3917 return next_op_idx;
3918 }
3919 }
3920 return -1;
3921 }
3922
3923 public:
3924 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3925 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3926
3927 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3928 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
3929 typename ConType::NativeType con_value) {
3930 if (_op1_node->Opcode() != op1) {
3931 return false;
3932 }
3933 if (_mop_node->outcnt() > 2) {
3934 return false;
3935 }
3936 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
3937 if (op1_op2_idx == -1) {
3938 return false;
3939 }
3940 // Memory operation must be the other edge
3941 int op1_mop_idx = (op1_op2_idx & 1) + 1;
3942
3943 // Check that the mop node is really what we want
3944 if (_op1_node->in(op1_mop_idx) == _mop_node) {
3945 Node* op2_node = _op1_node->in(op1_op2_idx);
3946 if (op2_node->outcnt() > 1) {
3947 return false;
3948 }
3949 assert(op2_node->Opcode() == op2, "Should be");
3950 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
3951 if (op2_con_idx == -1) {
3952 return false;
3953 }
3954 // Memory operation must be the other edge
3955 int op2_mop_idx = (op2_con_idx & 1) + 1;
3956 // Check that the memory operation is the same node
3957 if (op2_node->in(op2_mop_idx) == _mop_node) {
3958 // Now check the constant
3959 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
3960 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
3961 return true;
3962 }
3963 }
3964 }
3965 return false;
3966 }
3967 };
3968
3969 static bool is_bmi_pattern(Node* n, Node* m) {
3970 assert(UseBMI1Instructions, "sanity");
3971 if (n != nullptr && m != nullptr) {
3972 if (m->Opcode() == Op_LoadI) {
3973 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
3974 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
3975 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
3976 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
3977 } else if (m->Opcode() == Op_LoadL) {
3978 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
3979 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
3980 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
3981 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
3982 }
3983 }
3984 return false;
3985 }
3986
3987 // Should the matcher clone input 'm' of node 'n'?
3988 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
3989 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
3990 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
3991 mstack.push(m, Visit);
3992 return true;
3993 }
3994 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
3995 mstack.push(m, Visit); // m = ShiftCntV
3996 return true;
3997 }
3998 if (is_encode_and_store_pattern(n, m)) {
3999 mstack.push(m, Visit);
4000 return true;
4001 }
4002 return false;
4003 }
4004
4005 // Should the Matcher clone shifts on addressing modes, expecting them
4006 // to be subsumed into complex addressing expressions or compute them
4007 // into registers?
4008 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4009 Node *off = m->in(AddPNode::Offset);
4010 if (off->is_Con()) {
4011 address_visited.test_set(m->_idx); // Flag as address_visited
4012 Node *adr = m->in(AddPNode::Address);
4013
4014 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4015 // AtomicAdd is not an addressing expression.
4016 // Cheap to find it by looking for screwy base.
4017 if (adr->is_AddP() &&
4018 !adr->in(AddPNode::Base)->is_top() &&
4019 !adr->in(AddPNode::Offset)->is_Con() &&
4020 off->get_long() == (int) (off->get_long()) && // immL32
4021 // Are there other uses besides address expressions?
4022 !is_visited(adr)) {
4023 address_visited.set(adr->_idx); // Flag as address_visited
4024 Node *shift = adr->in(AddPNode::Offset);
4025 if (!clone_shift(shift, this, mstack, address_visited)) {
4026 mstack.push(shift, Pre_Visit);
4027 }
4028 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4029 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4030 } else {
4031 mstack.push(adr, Pre_Visit);
4032 }
4033
4034 // Clone X+offset as it also folds into most addressing expressions
4035 mstack.push(off, Visit);
4036 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4037 return true;
4038 } else if (clone_shift(off, this, mstack, address_visited)) {
4039 address_visited.test_set(m->_idx); // Flag as address_visited
4040 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4041 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4042 return true;
4043 }
4044 return false;
4045 }
4046
4047 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4048 switch (bt) {
4049 case BoolTest::eq:
4050 return Assembler::eq;
4051 case BoolTest::ne:
4052 return Assembler::neq;
4053 case BoolTest::le:
4054 case BoolTest::ule:
4055 return Assembler::le;
4056 case BoolTest::ge:
4057 case BoolTest::uge:
4058 return Assembler::nlt;
4059 case BoolTest::lt:
4060 case BoolTest::ult:
4061 return Assembler::lt;
4062 case BoolTest::gt:
4063 case BoolTest::ugt:
4064 return Assembler::nle;
4065 default : ShouldNotReachHere(); return Assembler::_false;
4066 }
4067 }
4068
4069 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4070 switch (bt) {
4071 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4072 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4073 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4074 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4075 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4076 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4077 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4078 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4079 }
4080 }
4081
4082 // Helper methods for MachSpillCopyNode::implementation().
4083 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4084 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4085 assert(ireg == Op_VecS || // 32bit vector
4086 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4087 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4088 "no non-adjacent vector moves" );
4089 if (masm) {
4090 switch (ireg) {
4091 case Op_VecS: // copy whole register
4092 case Op_VecD:
4093 case Op_VecX:
4094 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4095 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4096 } else {
4097 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4098 }
4099 break;
4100 case Op_VecY:
4101 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4102 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4103 } else {
4104 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4105 }
4106 break;
4107 case Op_VecZ:
4108 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4109 break;
4110 default:
4111 ShouldNotReachHere();
4112 }
4113 #ifndef PRODUCT
4114 } else {
4115 switch (ireg) {
4116 case Op_VecS:
4117 case Op_VecD:
4118 case Op_VecX:
4119 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4120 break;
4121 case Op_VecY:
4122 case Op_VecZ:
4123 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4124 break;
4125 default:
4126 ShouldNotReachHere();
4127 }
4128 #endif
4129 }
4130 }
4131
4132 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4133 int stack_offset, int reg, uint ireg, outputStream* st) {
4134 if (masm) {
4135 if (is_load) {
4136 switch (ireg) {
4137 case Op_VecS:
4138 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4139 break;
4140 case Op_VecD:
4141 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4142 break;
4143 case Op_VecX:
4144 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4145 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4146 } else {
4147 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4148 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4149 }
4150 break;
4151 case Op_VecY:
4152 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4153 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4154 } else {
4155 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4156 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4157 }
4158 break;
4159 case Op_VecZ:
4160 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4161 break;
4162 default:
4163 ShouldNotReachHere();
4164 }
4165 } else { // store
4166 switch (ireg) {
4167 case Op_VecS:
4168 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4169 break;
4170 case Op_VecD:
4171 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4172 break;
4173 case Op_VecX:
4174 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4175 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4176 }
4177 else {
4178 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4179 }
4180 break;
4181 case Op_VecY:
4182 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4183 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4184 }
4185 else {
4186 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4187 }
4188 break;
4189 case Op_VecZ:
4190 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4191 break;
4192 default:
4193 ShouldNotReachHere();
4194 }
4195 }
4196 #ifndef PRODUCT
4197 } else {
4198 if (is_load) {
4199 switch (ireg) {
4200 case Op_VecS:
4201 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4202 break;
4203 case Op_VecD:
4204 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4205 break;
4206 case Op_VecX:
4207 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4208 break;
4209 case Op_VecY:
4210 case Op_VecZ:
4211 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4212 break;
4213 default:
4214 ShouldNotReachHere();
4215 }
4216 } else { // store
4217 switch (ireg) {
4218 case Op_VecS:
4219 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4220 break;
4221 case Op_VecD:
4222 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4223 break;
4224 case Op_VecX:
4225 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4226 break;
4227 case Op_VecY:
4228 case Op_VecZ:
4229 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4230 break;
4231 default:
4232 ShouldNotReachHere();
4233 }
4234 }
4235 #endif
4236 }
4237 }
4238
4239 template <class T>
4240 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4241 int size = type2aelembytes(bt) * len;
4242 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4243 for (int i = 0; i < len; i++) {
4244 int offset = i * type2aelembytes(bt);
4245 switch (bt) {
4246 case T_BYTE: val->at(i) = con; break;
4247 case T_SHORT: {
4248 jshort c = con;
4249 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4250 break;
4251 }
4252 case T_INT: {
4253 jint c = con;
4254 memcpy(val->adr_at(offset), &c, sizeof(jint));
4255 break;
4256 }
4257 case T_LONG: {
4258 jlong c = con;
4259 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4260 break;
4261 }
4262 case T_FLOAT: {
4263 jfloat c = con;
4264 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4265 break;
4266 }
4267 case T_DOUBLE: {
4268 jdouble c = con;
4269 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4270 break;
4271 }
4272 default: assert(false, "%s", type2name(bt));
4273 }
4274 }
4275 return val;
4276 }
4277
4278 static inline jlong high_bit_set(BasicType bt) {
4279 switch (bt) {
4280 case T_BYTE: return 0x8080808080808080;
4281 case T_SHORT: return 0x8000800080008000;
4282 case T_INT: return 0x8000000080000000;
4283 case T_LONG: return 0x8000000000000000;
4284 default:
4285 ShouldNotReachHere();
4286 return 0;
4287 }
4288 }
4289
4290 #ifndef PRODUCT
4291 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4292 st->print("nop \t# %d bytes pad for loops and calls", _count);
4293 }
4294 #endif
4295
4296 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4297 __ nop(_count);
4298 }
4299
4300 uint MachNopNode::size(PhaseRegAlloc*) const {
4301 return _count;
4302 }
4303
4304 #ifndef PRODUCT
4305 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4306 st->print("# breakpoint");
4307 }
4308 #endif
4309
4310 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4311 __ int3();
4312 }
4313
4314 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4315 return MachNode::size(ra_);
4316 }
4317
4318 %}
4319
4320 //----------ENCODING BLOCK-----------------------------------------------------
4321 // This block specifies the encoding classes used by the compiler to
4322 // output byte streams. Encoding classes are parameterized macros
4323 // used by Machine Instruction Nodes in order to generate the bit
4324 // encoding of the instruction. Operands specify their base encoding
4325 // interface with the interface keyword. There are currently
4326 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4327 // COND_INTER. REG_INTER causes an operand to generate a function
4328 // which returns its register number when queried. CONST_INTER causes
4329 // an operand to generate a function which returns the value of the
4330 // constant when queried. MEMORY_INTER causes an operand to generate
4331 // four functions which return the Base Register, the Index Register,
4332 // the Scale Value, and the Offset Value of the operand when queried.
4333 // COND_INTER causes an operand to generate six functions which return
4334 // the encoding code (ie - encoding bits for the instruction)
4335 // associated with each basic boolean condition for a conditional
4336 // instruction.
4337 //
4338 // Instructions specify two basic values for encoding. Again, a
4339 // function is available to check if the constant displacement is an
4340 // oop. They use the ins_encode keyword to specify their encoding
4341 // classes (which must be a sequence of enc_class names, and their
4342 // parameters, specified in the encoding block), and they use the
4343 // opcode keyword to specify, in order, their primary, secondary, and
4344 // tertiary opcode. Only the opcode sections which a particular
4345 // instruction needs for encoding need to be specified.
4346 encode %{
4347 enc_class cdql_enc(no_rax_rdx_RegI div)
4348 %{
4349 // Full implementation of Java idiv and irem; checks for
4350 // special case as described in JVM spec., p.243 & p.271.
4351 //
4352 // normal case special case
4353 //
4354 // input : rax: dividend min_int
4355 // reg: divisor -1
4356 //
4357 // output: rax: quotient (= rax idiv reg) min_int
4358 // rdx: remainder (= rax irem reg) 0
4359 //
4360 // Code sequnce:
4361 //
4362 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4363 // 5: 75 07/08 jne e <normal>
4364 // 7: 33 d2 xor %edx,%edx
4365 // [div >= 8 -> offset + 1]
4366 // [REX_B]
4367 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4368 // c: 74 03/04 je 11 <done>
4369 // 000000000000000e <normal>:
4370 // e: 99 cltd
4371 // [div >= 8 -> offset + 1]
4372 // [REX_B]
4373 // f: f7 f9 idiv $div
4374 // 0000000000000011 <done>:
4375 Label normal;
4376 Label done;
4377
4378 // cmp $0x80000000,%eax
4379 __ cmpl(as_Register(RAX_enc), 0x80000000);
4380
4381 // jne e <normal>
4382 __ jccb(Assembler::notEqual, normal);
4383
4384 // xor %edx,%edx
4385 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4386
4387 // cmp $0xffffffffffffffff,%ecx
4388 __ cmpl($div$$Register, -1);
4389
4390 // je 11 <done>
4391 __ jccb(Assembler::equal, done);
4392
4393 // <normal>
4394 // cltd
4395 __ bind(normal);
4396 __ cdql();
4397
4398 // idivl
4399 // <done>
4400 __ idivl($div$$Register);
4401 __ bind(done);
4402 %}
4403
4404 enc_class cdqq_enc(no_rax_rdx_RegL div)
4405 %{
4406 // Full implementation of Java ldiv and lrem; checks for
4407 // special case as described in JVM spec., p.243 & p.271.
4408 //
4409 // normal case special case
4410 //
4411 // input : rax: dividend min_long
4412 // reg: divisor -1
4413 //
4414 // output: rax: quotient (= rax idiv reg) min_long
4415 // rdx: remainder (= rax irem reg) 0
4416 //
4417 // Code sequnce:
4418 //
4419 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4420 // 7: 00 00 80
4421 // a: 48 39 d0 cmp %rdx,%rax
4422 // d: 75 08 jne 17 <normal>
4423 // f: 33 d2 xor %edx,%edx
4424 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4425 // 15: 74 05 je 1c <done>
4426 // 0000000000000017 <normal>:
4427 // 17: 48 99 cqto
4428 // 19: 48 f7 f9 idiv $div
4429 // 000000000000001c <done>:
4430 Label normal;
4431 Label done;
4432
4433 // mov $0x8000000000000000,%rdx
4434 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4435
4436 // cmp %rdx,%rax
4437 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4438
4439 // jne 17 <normal>
4440 __ jccb(Assembler::notEqual, normal);
4441
4442 // xor %edx,%edx
4443 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4444
4445 // cmp $0xffffffffffffffff,$div
4446 __ cmpq($div$$Register, -1);
4447
4448 // je 1e <done>
4449 __ jccb(Assembler::equal, done);
4450
4451 // <normal>
4452 // cqto
4453 __ bind(normal);
4454 __ cdqq();
4455
4456 // idivq (note: must be emitted by the user of this rule)
4457 // <done>
4458 __ idivq($div$$Register);
4459 __ bind(done);
4460 %}
4461
4462 enc_class clear_avx %{
4463 DEBUG_ONLY(int off0 = __ offset());
4464 if (generate_vzeroupper(Compile::current())) {
4465 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4466 // Clear upper bits of YMM registers when current compiled code uses
4467 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4468 __ vzeroupper();
4469 }
4470 DEBUG_ONLY(int off1 = __ offset());
4471 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4472 %}
4473
4474 enc_class Java_To_Runtime(method meth) %{
4475 __ lea(r10, RuntimeAddress((address)$meth$$method));
4476 __ call(r10);
4477 __ post_call_nop();
4478 %}
4479
4480 enc_class Java_Static_Call(method meth)
4481 %{
4482 // JAVA STATIC CALL
4483 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4484 // determine who we intended to call.
4485 if (!_method) {
4486 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4487 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4488 // The NOP here is purely to ensure that eliding a call to
4489 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4490 __ addr_nop_5();
4491 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4492 } else {
4493 int method_index = resolved_method_index(masm);
4494 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4495 : static_call_Relocation::spec(method_index);
4496 address mark = __ pc();
4497 int call_offset = __ offset();
4498 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4499 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4500 // Calls of the same statically bound method can share
4501 // a stub to the interpreter.
4502 __ code()->shared_stub_to_interp_for(_method, call_offset);
4503 } else {
4504 // Emit stubs for static call.
4505 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4506 __ clear_inst_mark();
4507 if (stub == nullptr) {
4508 ciEnv::current()->record_failure("CodeCache is full");
4509 return;
4510 }
4511 }
4512 }
4513 __ post_call_nop();
4514 %}
4515
4516 enc_class Java_Dynamic_Call(method meth) %{
4517 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4518 __ post_call_nop();
4519 %}
4520
4521 enc_class call_epilog %{
4522 if (VerifyStackAtCalls) {
4523 // Check that stack depth is unchanged: find majik cookie on stack
4524 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4525 Label L;
4526 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4527 __ jccb(Assembler::equal, L);
4528 // Die if stack mismatch
4529 __ int3();
4530 __ bind(L);
4531 }
4532 %}
4533
4534 %}
4535
4536 //----------FRAME--------------------------------------------------------------
4537 // Definition of frame structure and management information.
4538 //
4539 // S T A C K L A Y O U T Allocators stack-slot number
4540 // | (to get allocators register number
4541 // G Owned by | | v add OptoReg::stack0())
4542 // r CALLER | |
4543 // o | +--------+ pad to even-align allocators stack-slot
4544 // w V | pad0 | numbers; owned by CALLER
4545 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4546 // h ^ | in | 5
4547 // | | args | 4 Holes in incoming args owned by SELF
4548 // | | | | 3
4549 // | | +--------+
4550 // V | | old out| Empty on Intel, window on Sparc
4551 // | old |preserve| Must be even aligned.
4552 // | SP-+--------+----> Matcher::_old_SP, even aligned
4553 // | | in | 3 area for Intel ret address
4554 // Owned by |preserve| Empty on Sparc.
4555 // SELF +--------+
4556 // | | pad2 | 2 pad to align old SP
4557 // | +--------+ 1
4558 // | | locks | 0
4559 // | +--------+----> OptoReg::stack0(), even aligned
4560 // | | pad1 | 11 pad to align new SP
4561 // | +--------+
4562 // | | | 10
4563 // | | spills | 9 spills
4564 // V | | 8 (pad0 slot for callee)
4565 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4566 // ^ | out | 7
4567 // | | args | 6 Holes in outgoing args owned by CALLEE
4568 // Owned by +--------+
4569 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4570 // | new |preserve| Must be even-aligned.
4571 // | SP-+--------+----> Matcher::_new_SP, even aligned
4572 // | | |
4573 //
4574 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4575 // known from SELF's arguments and the Java calling convention.
4576 // Region 6-7 is determined per call site.
4577 // Note 2: If the calling convention leaves holes in the incoming argument
4578 // area, those holes are owned by SELF. Holes in the outgoing area
4579 // are owned by the CALLEE. Holes should not be necessary in the
4580 // incoming area, as the Java calling convention is completely under
4581 // the control of the AD file. Doubles can be sorted and packed to
4582 // avoid holes. Holes in the outgoing arguments may be necessary for
4583 // varargs C calling conventions.
4584 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4585 // even aligned with pad0 as needed.
4586 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4587 // region 6-11 is even aligned; it may be padded out more so that
4588 // the region from SP to FP meets the minimum stack alignment.
4589 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4590 // alignment. Region 11, pad1, may be dynamically extended so that
4591 // SP meets the minimum alignment.
4592
4593 frame
4594 %{
4595 // These three registers define part of the calling convention
4596 // between compiled code and the interpreter.
4597 inline_cache_reg(RAX); // Inline Cache Register
4598
4599 // Optional: name the operand used by cisc-spilling to access
4600 // [stack_pointer + offset]
4601 cisc_spilling_operand_name(indOffset32);
4602
4603 // Number of stack slots consumed by locking an object
4604 sync_stack_slots(2);
4605
4606 // Compiled code's Frame Pointer
4607 frame_pointer(RSP);
4608
4609 // Interpreter stores its frame pointer in a register which is
4610 // stored to the stack by I2CAdaptors.
4611 // I2CAdaptors convert from interpreted java to compiled java.
4612 interpreter_frame_pointer(RBP);
4613
4614 // Stack alignment requirement
4615 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4616
4617 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4618 // for calls to C. Supports the var-args backing area for register parms.
4619 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4620
4621 // The after-PROLOG location of the return address. Location of
4622 // return address specifies a type (REG or STACK) and a number
4623 // representing the register number (i.e. - use a register name) or
4624 // stack slot.
4625 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4626 // Otherwise, it is above the locks and verification slot and alignment word
4627 return_addr(STACK - 2 +
4628 align_up((Compile::current()->in_preserve_stack_slots() +
4629 Compile::current()->fixed_slots()),
4630 stack_alignment_in_slots()));
4631
4632 // Location of compiled Java return values. Same as C for now.
4633 return_value
4634 %{
4635 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4636 "only return normal values");
4637
4638 static const int lo[Op_RegL + 1] = {
4639 0,
4640 0,
4641 RAX_num, // Op_RegN
4642 RAX_num, // Op_RegI
4643 RAX_num, // Op_RegP
4644 XMM0_num, // Op_RegF
4645 XMM0_num, // Op_RegD
4646 RAX_num // Op_RegL
4647 };
4648 static const int hi[Op_RegL + 1] = {
4649 0,
4650 0,
4651 OptoReg::Bad, // Op_RegN
4652 OptoReg::Bad, // Op_RegI
4653 RAX_H_num, // Op_RegP
4654 OptoReg::Bad, // Op_RegF
4655 XMM0b_num, // Op_RegD
4656 RAX_H_num // Op_RegL
4657 };
4658 // Excluded flags and vector registers.
4659 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4660 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4661 %}
4662 %}
4663
4664 //----------ATTRIBUTES---------------------------------------------------------
4665 //----------Operand Attributes-------------------------------------------------
4666 op_attrib op_cost(0); // Required cost attribute
4667
4668 //----------Instruction Attributes---------------------------------------------
4669 ins_attrib ins_cost(100); // Required cost attribute
4670 ins_attrib ins_size(8); // Required size attribute (in bits)
4671 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4672 // a non-matching short branch variant
4673 // of some long branch?
4674 ins_attrib ins_alignment(1); // Required alignment attribute (must
4675 // be a power of 2) specifies the
4676 // alignment that some part of the
4677 // instruction (not necessarily the
4678 // start) requires. If > 1, a
4679 // compute_padding() function must be
4680 // provided for the instruction
4681
4682 // Whether this node is expanded during code emission into a sequence of
4683 // instructions and the first instruction can perform an implicit null check.
4684 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4685
4686 //----------OPERANDS-----------------------------------------------------------
4687 // Operand definitions must precede instruction definitions for correct parsing
4688 // in the ADLC because operands constitute user defined types which are used in
4689 // instruction definitions.
4690
4691 //----------Simple Operands----------------------------------------------------
4692 // Immediate Operands
4693 // Integer Immediate
4694 operand immI()
4695 %{
4696 match(ConI);
4697
4698 op_cost(10);
4699 format %{ %}
4700 interface(CONST_INTER);
4701 %}
4702
4703 // Constant for test vs zero
4704 operand immI_0()
4705 %{
4706 predicate(n->get_int() == 0);
4707 match(ConI);
4708
4709 op_cost(0);
4710 format %{ %}
4711 interface(CONST_INTER);
4712 %}
4713
4714 // Constant for increment
4715 operand immI_1()
4716 %{
4717 predicate(n->get_int() == 1);
4718 match(ConI);
4719
4720 op_cost(0);
4721 format %{ %}
4722 interface(CONST_INTER);
4723 %}
4724
4725 // Constant for decrement
4726 operand immI_M1()
4727 %{
4728 predicate(n->get_int() == -1);
4729 match(ConI);
4730
4731 op_cost(0);
4732 format %{ %}
4733 interface(CONST_INTER);
4734 %}
4735
4736 operand immI_2()
4737 %{
4738 predicate(n->get_int() == 2);
4739 match(ConI);
4740
4741 op_cost(0);
4742 format %{ %}
4743 interface(CONST_INTER);
4744 %}
4745
4746 operand immI_4()
4747 %{
4748 predicate(n->get_int() == 4);
4749 match(ConI);
4750
4751 op_cost(0);
4752 format %{ %}
4753 interface(CONST_INTER);
4754 %}
4755
4756 operand immI_8()
4757 %{
4758 predicate(n->get_int() == 8);
4759 match(ConI);
4760
4761 op_cost(0);
4762 format %{ %}
4763 interface(CONST_INTER);
4764 %}
4765
4766 // Valid scale values for addressing modes
4767 operand immI2()
4768 %{
4769 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4770 match(ConI);
4771
4772 format %{ %}
4773 interface(CONST_INTER);
4774 %}
4775
4776 operand immU7()
4777 %{
4778 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4779 match(ConI);
4780
4781 op_cost(5);
4782 format %{ %}
4783 interface(CONST_INTER);
4784 %}
4785
4786 operand immI8()
4787 %{
4788 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4789 match(ConI);
4790
4791 op_cost(5);
4792 format %{ %}
4793 interface(CONST_INTER);
4794 %}
4795
4796 operand immU8()
4797 %{
4798 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4799 match(ConI);
4800
4801 op_cost(5);
4802 format %{ %}
4803 interface(CONST_INTER);
4804 %}
4805
4806 operand immI16()
4807 %{
4808 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4809 match(ConI);
4810
4811 op_cost(10);
4812 format %{ %}
4813 interface(CONST_INTER);
4814 %}
4815
4816 // Int Immediate non-negative
4817 operand immU31()
4818 %{
4819 predicate(n->get_int() >= 0);
4820 match(ConI);
4821
4822 op_cost(0);
4823 format %{ %}
4824 interface(CONST_INTER);
4825 %}
4826
4827 // Pointer Immediate
4828 operand immP()
4829 %{
4830 match(ConP);
4831
4832 op_cost(10);
4833 format %{ %}
4834 interface(CONST_INTER);
4835 %}
4836
4837 // Null Pointer Immediate
4838 operand immP0()
4839 %{
4840 predicate(n->get_ptr() == 0);
4841 match(ConP);
4842
4843 op_cost(5);
4844 format %{ %}
4845 interface(CONST_INTER);
4846 %}
4847
4848 // Pointer Immediate
4849 operand immN() %{
4850 match(ConN);
4851
4852 op_cost(10);
4853 format %{ %}
4854 interface(CONST_INTER);
4855 %}
4856
4857 operand immNKlass() %{
4858 match(ConNKlass);
4859
4860 op_cost(10);
4861 format %{ %}
4862 interface(CONST_INTER);
4863 %}
4864
4865 // Null Pointer Immediate
4866 operand immN0() %{
4867 predicate(n->get_narrowcon() == 0);
4868 match(ConN);
4869
4870 op_cost(5);
4871 format %{ %}
4872 interface(CONST_INTER);
4873 %}
4874
4875 operand immP31()
4876 %{
4877 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4878 && (n->get_ptr() >> 31) == 0);
4879 match(ConP);
4880
4881 op_cost(5);
4882 format %{ %}
4883 interface(CONST_INTER);
4884 %}
4885
4886
4887 // Long Immediate
4888 operand immL()
4889 %{
4890 match(ConL);
4891
4892 op_cost(20);
4893 format %{ %}
4894 interface(CONST_INTER);
4895 %}
4896
4897 // Long Immediate 8-bit
4898 operand immL8()
4899 %{
4900 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4901 match(ConL);
4902
4903 op_cost(5);
4904 format %{ %}
4905 interface(CONST_INTER);
4906 %}
4907
4908 // Long Immediate 32-bit unsigned
4909 operand immUL32()
4910 %{
4911 predicate(n->get_long() == (unsigned int) (n->get_long()));
4912 match(ConL);
4913
4914 op_cost(10);
4915 format %{ %}
4916 interface(CONST_INTER);
4917 %}
4918
4919 // Long Immediate 32-bit signed
4920 operand immL32()
4921 %{
4922 predicate(n->get_long() == (int) (n->get_long()));
4923 match(ConL);
4924
4925 op_cost(15);
4926 format %{ %}
4927 interface(CONST_INTER);
4928 %}
4929
4930 operand immL_Pow2()
4931 %{
4932 predicate(is_power_of_2((julong)n->get_long()));
4933 match(ConL);
4934
4935 op_cost(15);
4936 format %{ %}
4937 interface(CONST_INTER);
4938 %}
4939
4940 operand immL_NotPow2()
4941 %{
4942 predicate(is_power_of_2((julong)~n->get_long()));
4943 match(ConL);
4944
4945 op_cost(15);
4946 format %{ %}
4947 interface(CONST_INTER);
4948 %}
4949
4950 // Long Immediate zero
4951 operand immL0()
4952 %{
4953 predicate(n->get_long() == 0L);
4954 match(ConL);
4955
4956 op_cost(10);
4957 format %{ %}
4958 interface(CONST_INTER);
4959 %}
4960
4961 // Constant for increment
4962 operand immL1()
4963 %{
4964 predicate(n->get_long() == 1);
4965 match(ConL);
4966
4967 format %{ %}
4968 interface(CONST_INTER);
4969 %}
4970
4971 // Constant for decrement
4972 operand immL_M1()
4973 %{
4974 predicate(n->get_long() == -1);
4975 match(ConL);
4976
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Long Immediate: low 32-bit mask
4982 operand immL_32bits()
4983 %{
4984 predicate(n->get_long() == 0xFFFFFFFFL);
4985 match(ConL);
4986 op_cost(20);
4987
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 // Int Immediate: 2^n-1, positive
4993 operand immI_Pow2M1()
4994 %{
4995 predicate((n->get_int() > 0)
4996 && is_power_of_2((juint)n->get_int() + 1));
4997 match(ConI);
4998
4999 op_cost(20);
5000 format %{ %}
5001 interface(CONST_INTER);
5002 %}
5003
5004 // Float Immediate zero
5005 operand immF0()
5006 %{
5007 predicate(jint_cast(n->getf()) == 0);
5008 match(ConF);
5009
5010 op_cost(5);
5011 format %{ %}
5012 interface(CONST_INTER);
5013 %}
5014
5015 // Float Immediate
5016 operand immF()
5017 %{
5018 match(ConF);
5019
5020 op_cost(15);
5021 format %{ %}
5022 interface(CONST_INTER);
5023 %}
5024
5025 // Half Float Immediate
5026 operand immH()
5027 %{
5028 match(ConH);
5029
5030 op_cost(15);
5031 format %{ %}
5032 interface(CONST_INTER);
5033 %}
5034
5035 // Double Immediate zero
5036 operand immD0()
5037 %{
5038 predicate(jlong_cast(n->getd()) == 0);
5039 match(ConD);
5040
5041 op_cost(5);
5042 format %{ %}
5043 interface(CONST_INTER);
5044 %}
5045
5046 // Double Immediate
5047 operand immD()
5048 %{
5049 match(ConD);
5050
5051 op_cost(15);
5052 format %{ %}
5053 interface(CONST_INTER);
5054 %}
5055
5056 // Immediates for special shifts (sign extend)
5057
5058 // Constants for increment
5059 operand immI_16()
5060 %{
5061 predicate(n->get_int() == 16);
5062 match(ConI);
5063
5064 format %{ %}
5065 interface(CONST_INTER);
5066 %}
5067
5068 operand immI_24()
5069 %{
5070 predicate(n->get_int() == 24);
5071 match(ConI);
5072
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Constant for byte-wide masking
5078 operand immI_255()
5079 %{
5080 predicate(n->get_int() == 255);
5081 match(ConI);
5082
5083 format %{ %}
5084 interface(CONST_INTER);
5085 %}
5086
5087 // Constant for short-wide masking
5088 operand immI_65535()
5089 %{
5090 predicate(n->get_int() == 65535);
5091 match(ConI);
5092
5093 format %{ %}
5094 interface(CONST_INTER);
5095 %}
5096
5097 // Constant for byte-wide masking
5098 operand immL_255()
5099 %{
5100 predicate(n->get_long() == 255);
5101 match(ConL);
5102
5103 format %{ %}
5104 interface(CONST_INTER);
5105 %}
5106
5107 // Constant for short-wide masking
5108 operand immL_65535()
5109 %{
5110 predicate(n->get_long() == 65535);
5111 match(ConL);
5112
5113 format %{ %}
5114 interface(CONST_INTER);
5115 %}
5116
5117 operand kReg()
5118 %{
5119 constraint(ALLOC_IN_RC(vectmask_reg));
5120 match(RegVectMask);
5121 format %{%}
5122 interface(REG_INTER);
5123 %}
5124
5125 // Register Operands
5126 // Integer Register
5127 operand rRegI()
5128 %{
5129 constraint(ALLOC_IN_RC(int_reg));
5130 match(RegI);
5131
5132 match(rax_RegI);
5133 match(rbx_RegI);
5134 match(rcx_RegI);
5135 match(rdx_RegI);
5136 match(rdi_RegI);
5137
5138 format %{ %}
5139 interface(REG_INTER);
5140 %}
5141
5142 // Special Registers
5143 operand rax_RegI()
5144 %{
5145 constraint(ALLOC_IN_RC(int_rax_reg));
5146 match(RegI);
5147 match(rRegI);
5148
5149 format %{ "RAX" %}
5150 interface(REG_INTER);
5151 %}
5152
5153 // Special Registers
5154 operand rbx_RegI()
5155 %{
5156 constraint(ALLOC_IN_RC(int_rbx_reg));
5157 match(RegI);
5158 match(rRegI);
5159
5160 format %{ "RBX" %}
5161 interface(REG_INTER);
5162 %}
5163
5164 operand rcx_RegI()
5165 %{
5166 constraint(ALLOC_IN_RC(int_rcx_reg));
5167 match(RegI);
5168 match(rRegI);
5169
5170 format %{ "RCX" %}
5171 interface(REG_INTER);
5172 %}
5173
5174 operand rdx_RegI()
5175 %{
5176 constraint(ALLOC_IN_RC(int_rdx_reg));
5177 match(RegI);
5178 match(rRegI);
5179
5180 format %{ "RDX" %}
5181 interface(REG_INTER);
5182 %}
5183
5184 operand rdi_RegI()
5185 %{
5186 constraint(ALLOC_IN_RC(int_rdi_reg));
5187 match(RegI);
5188 match(rRegI);
5189
5190 format %{ "RDI" %}
5191 interface(REG_INTER);
5192 %}
5193
5194 operand no_rax_rdx_RegI()
5195 %{
5196 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5197 match(RegI);
5198 match(rbx_RegI);
5199 match(rcx_RegI);
5200 match(rdi_RegI);
5201
5202 format %{ %}
5203 interface(REG_INTER);
5204 %}
5205
5206 operand no_rbp_r13_RegI()
5207 %{
5208 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5209 match(RegI);
5210 match(rRegI);
5211 match(rax_RegI);
5212 match(rbx_RegI);
5213 match(rcx_RegI);
5214 match(rdx_RegI);
5215 match(rdi_RegI);
5216
5217 format %{ %}
5218 interface(REG_INTER);
5219 %}
5220
5221 // Pointer Register
5222 operand any_RegP()
5223 %{
5224 constraint(ALLOC_IN_RC(any_reg));
5225 match(RegP);
5226 match(rax_RegP);
5227 match(rbx_RegP);
5228 match(rdi_RegP);
5229 match(rsi_RegP);
5230 match(rbp_RegP);
5231 match(r15_RegP);
5232 match(rRegP);
5233
5234 format %{ %}
5235 interface(REG_INTER);
5236 %}
5237
5238 operand rRegP()
5239 %{
5240 constraint(ALLOC_IN_RC(ptr_reg));
5241 match(RegP);
5242 match(rax_RegP);
5243 match(rbx_RegP);
5244 match(rdi_RegP);
5245 match(rsi_RegP);
5246 match(rbp_RegP); // See Q&A below about
5247 match(r15_RegP); // r15_RegP and rbp_RegP.
5248
5249 format %{ %}
5250 interface(REG_INTER);
5251 %}
5252
5253 operand rRegN() %{
5254 constraint(ALLOC_IN_RC(int_reg));
5255 match(RegN);
5256
5257 format %{ %}
5258 interface(REG_INTER);
5259 %}
5260
5261 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5262 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5263 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5264 // The output of an instruction is controlled by the allocator, which respects
5265 // register class masks, not match rules. Unless an instruction mentions
5266 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5267 // by the allocator as an input.
5268 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5269 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5270 // result, RBP is not included in the output of the instruction either.
5271
5272 // This operand is not allowed to use RBP even if
5273 // RBP is not used to hold the frame pointer.
5274 operand no_rbp_RegP()
5275 %{
5276 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5277 match(RegP);
5278 match(rbx_RegP);
5279 match(rsi_RegP);
5280 match(rdi_RegP);
5281
5282 format %{ %}
5283 interface(REG_INTER);
5284 %}
5285
5286 // Special Registers
5287 // Return a pointer value
5288 operand rax_RegP()
5289 %{
5290 constraint(ALLOC_IN_RC(ptr_rax_reg));
5291 match(RegP);
5292 match(rRegP);
5293
5294 format %{ %}
5295 interface(REG_INTER);
5296 %}
5297
5298 // Special Registers
5299 // Return a compressed pointer value
5300 operand rax_RegN()
5301 %{
5302 constraint(ALLOC_IN_RC(int_rax_reg));
5303 match(RegN);
5304 match(rRegN);
5305
5306 format %{ %}
5307 interface(REG_INTER);
5308 %}
5309
5310 // Used in AtomicAdd
5311 operand rbx_RegP()
5312 %{
5313 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5314 match(RegP);
5315 match(rRegP);
5316
5317 format %{ %}
5318 interface(REG_INTER);
5319 %}
5320
5321 operand rsi_RegP()
5322 %{
5323 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5324 match(RegP);
5325 match(rRegP);
5326
5327 format %{ %}
5328 interface(REG_INTER);
5329 %}
5330
5331 operand rbp_RegP()
5332 %{
5333 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5334 match(RegP);
5335 match(rRegP);
5336
5337 format %{ %}
5338 interface(REG_INTER);
5339 %}
5340
5341 // Used in rep stosq
5342 operand rdi_RegP()
5343 %{
5344 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5345 match(RegP);
5346 match(rRegP);
5347
5348 format %{ %}
5349 interface(REG_INTER);
5350 %}
5351
5352 operand r15_RegP()
5353 %{
5354 constraint(ALLOC_IN_RC(ptr_r15_reg));
5355 match(RegP);
5356 match(rRegP);
5357
5358 format %{ %}
5359 interface(REG_INTER);
5360 %}
5361
5362 operand rRegL()
5363 %{
5364 constraint(ALLOC_IN_RC(long_reg));
5365 match(RegL);
5366 match(rax_RegL);
5367 match(rdx_RegL);
5368
5369 format %{ %}
5370 interface(REG_INTER);
5371 %}
5372
5373 // Special Registers
5374 operand no_rax_rdx_RegL()
5375 %{
5376 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5377 match(RegL);
5378 match(rRegL);
5379
5380 format %{ %}
5381 interface(REG_INTER);
5382 %}
5383
5384 operand rax_RegL()
5385 %{
5386 constraint(ALLOC_IN_RC(long_rax_reg));
5387 match(RegL);
5388 match(rRegL);
5389
5390 format %{ "RAX" %}
5391 interface(REG_INTER);
5392 %}
5393
5394 operand rcx_RegL()
5395 %{
5396 constraint(ALLOC_IN_RC(long_rcx_reg));
5397 match(RegL);
5398 match(rRegL);
5399
5400 format %{ %}
5401 interface(REG_INTER);
5402 %}
5403
5404 operand rdx_RegL()
5405 %{
5406 constraint(ALLOC_IN_RC(long_rdx_reg));
5407 match(RegL);
5408 match(rRegL);
5409
5410 format %{ %}
5411 interface(REG_INTER);
5412 %}
5413
5414 operand r11_RegL()
5415 %{
5416 constraint(ALLOC_IN_RC(long_r11_reg));
5417 match(RegL);
5418 match(rRegL);
5419
5420 format %{ %}
5421 interface(REG_INTER);
5422 %}
5423
5424 operand no_rbp_r13_RegL()
5425 %{
5426 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5427 match(RegL);
5428 match(rRegL);
5429 match(rax_RegL);
5430 match(rcx_RegL);
5431 match(rdx_RegL);
5432
5433 format %{ %}
5434 interface(REG_INTER);
5435 %}
5436
5437 // Flags register, used as output of compare instructions
5438 operand rFlagsReg()
5439 %{
5440 constraint(ALLOC_IN_RC(int_flags));
5441 match(RegFlags);
5442
5443 format %{ "RFLAGS" %}
5444 interface(REG_INTER);
5445 %}
5446
5447 // Flags register, used as output of FLOATING POINT compare instructions
5448 operand rFlagsRegU()
5449 %{
5450 constraint(ALLOC_IN_RC(int_flags));
5451 match(RegFlags);
5452
5453 format %{ "RFLAGS_U" %}
5454 interface(REG_INTER);
5455 %}
5456
5457 operand rFlagsRegUCF() %{
5458 constraint(ALLOC_IN_RC(int_flags));
5459 match(RegFlags);
5460 predicate(false);
5461
5462 format %{ "RFLAGS_U_CF" %}
5463 interface(REG_INTER);
5464 %}
5465
5466 // Float register operands
5467 operand regF() %{
5468 constraint(ALLOC_IN_RC(float_reg));
5469 match(RegF);
5470
5471 format %{ %}
5472 interface(REG_INTER);
5473 %}
5474
5475 // Float register operands
5476 operand legRegF() %{
5477 constraint(ALLOC_IN_RC(float_reg_legacy));
5478 match(RegF);
5479
5480 format %{ %}
5481 interface(REG_INTER);
5482 %}
5483
5484 // Float register operands
5485 operand vlRegF() %{
5486 constraint(ALLOC_IN_RC(float_reg_vl));
5487 match(RegF);
5488
5489 format %{ %}
5490 interface(REG_INTER);
5491 %}
5492
5493 // Double register operands
5494 operand regD() %{
5495 constraint(ALLOC_IN_RC(double_reg));
5496 match(RegD);
5497
5498 format %{ %}
5499 interface(REG_INTER);
5500 %}
5501
5502 // Double register operands
5503 operand legRegD() %{
5504 constraint(ALLOC_IN_RC(double_reg_legacy));
5505 match(RegD);
5506
5507 format %{ %}
5508 interface(REG_INTER);
5509 %}
5510
5511 // Double register operands
5512 operand vlRegD() %{
5513 constraint(ALLOC_IN_RC(double_reg_vl));
5514 match(RegD);
5515
5516 format %{ %}
5517 interface(REG_INTER);
5518 %}
5519
5520 //----------Memory Operands----------------------------------------------------
5521 // Direct Memory Operand
5522 // operand direct(immP addr)
5523 // %{
5524 // match(addr);
5525
5526 // format %{ "[$addr]" %}
5527 // interface(MEMORY_INTER) %{
5528 // base(0xFFFFFFFF);
5529 // index(0x4);
5530 // scale(0x0);
5531 // disp($addr);
5532 // %}
5533 // %}
5534
5535 // Indirect Memory Operand
5536 operand indirect(any_RegP reg)
5537 %{
5538 constraint(ALLOC_IN_RC(ptr_reg));
5539 match(reg);
5540
5541 format %{ "[$reg]" %}
5542 interface(MEMORY_INTER) %{
5543 base($reg);
5544 index(0x4);
5545 scale(0x0);
5546 disp(0x0);
5547 %}
5548 %}
5549
5550 // Indirect Memory Plus Short Offset Operand
5551 operand indOffset8(any_RegP reg, immL8 off)
5552 %{
5553 constraint(ALLOC_IN_RC(ptr_reg));
5554 match(AddP reg off);
5555
5556 format %{ "[$reg + $off (8-bit)]" %}
5557 interface(MEMORY_INTER) %{
5558 base($reg);
5559 index(0x4);
5560 scale(0x0);
5561 disp($off);
5562 %}
5563 %}
5564
5565 // Indirect Memory Plus Long Offset Operand
5566 operand indOffset32(any_RegP reg, immL32 off)
5567 %{
5568 constraint(ALLOC_IN_RC(ptr_reg));
5569 match(AddP reg off);
5570
5571 format %{ "[$reg + $off (32-bit)]" %}
5572 interface(MEMORY_INTER) %{
5573 base($reg);
5574 index(0x4);
5575 scale(0x0);
5576 disp($off);
5577 %}
5578 %}
5579
5580 // Indirect Memory Plus Index Register Plus Offset Operand
5581 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5582 %{
5583 constraint(ALLOC_IN_RC(ptr_reg));
5584 match(AddP (AddP reg lreg) off);
5585
5586 op_cost(10);
5587 format %{"[$reg + $off + $lreg]" %}
5588 interface(MEMORY_INTER) %{
5589 base($reg);
5590 index($lreg);
5591 scale(0x0);
5592 disp($off);
5593 %}
5594 %}
5595
5596 // Indirect Memory Plus Index Register Plus Offset Operand
5597 operand indIndex(any_RegP reg, rRegL lreg)
5598 %{
5599 constraint(ALLOC_IN_RC(ptr_reg));
5600 match(AddP reg lreg);
5601
5602 op_cost(10);
5603 format %{"[$reg + $lreg]" %}
5604 interface(MEMORY_INTER) %{
5605 base($reg);
5606 index($lreg);
5607 scale(0x0);
5608 disp(0x0);
5609 %}
5610 %}
5611
5612 // Indirect Memory Times Scale Plus Index Register
5613 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5614 %{
5615 constraint(ALLOC_IN_RC(ptr_reg));
5616 match(AddP reg (LShiftL lreg scale));
5617
5618 op_cost(10);
5619 format %{"[$reg + $lreg << $scale]" %}
5620 interface(MEMORY_INTER) %{
5621 base($reg);
5622 index($lreg);
5623 scale($scale);
5624 disp(0x0);
5625 %}
5626 %}
5627
5628 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5629 %{
5630 constraint(ALLOC_IN_RC(ptr_reg));
5631 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5632 match(AddP reg (LShiftL (ConvI2L idx) scale));
5633
5634 op_cost(10);
5635 format %{"[$reg + pos $idx << $scale]" %}
5636 interface(MEMORY_INTER) %{
5637 base($reg);
5638 index($idx);
5639 scale($scale);
5640 disp(0x0);
5641 %}
5642 %}
5643
5644 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5645 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5646 %{
5647 constraint(ALLOC_IN_RC(ptr_reg));
5648 match(AddP (AddP reg (LShiftL lreg scale)) off);
5649
5650 op_cost(10);
5651 format %{"[$reg + $off + $lreg << $scale]" %}
5652 interface(MEMORY_INTER) %{
5653 base($reg);
5654 index($lreg);
5655 scale($scale);
5656 disp($off);
5657 %}
5658 %}
5659
5660 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5661 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5662 %{
5663 constraint(ALLOC_IN_RC(ptr_reg));
5664 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5665 match(AddP (AddP reg (ConvI2L idx)) off);
5666
5667 op_cost(10);
5668 format %{"[$reg + $off + $idx]" %}
5669 interface(MEMORY_INTER) %{
5670 base($reg);
5671 index($idx);
5672 scale(0x0);
5673 disp($off);
5674 %}
5675 %}
5676
5677 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5678 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5679 %{
5680 constraint(ALLOC_IN_RC(ptr_reg));
5681 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5682 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5683
5684 op_cost(10);
5685 format %{"[$reg + $off + $idx << $scale]" %}
5686 interface(MEMORY_INTER) %{
5687 base($reg);
5688 index($idx);
5689 scale($scale);
5690 disp($off);
5691 %}
5692 %}
5693
5694 // Indirect Narrow Oop Plus Offset Operand
5695 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5696 // we can't free r12 even with CompressedOops::base() == nullptr.
5697 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5698 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5699 constraint(ALLOC_IN_RC(ptr_reg));
5700 match(AddP (DecodeN reg) off);
5701
5702 op_cost(10);
5703 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5704 interface(MEMORY_INTER) %{
5705 base(0xc); // R12
5706 index($reg);
5707 scale(0x3);
5708 disp($off);
5709 %}
5710 %}
5711
5712 // Indirect Memory Operand
5713 operand indirectNarrow(rRegN reg)
5714 %{
5715 predicate(CompressedOops::shift() == 0);
5716 constraint(ALLOC_IN_RC(ptr_reg));
5717 match(DecodeN reg);
5718
5719 format %{ "[$reg]" %}
5720 interface(MEMORY_INTER) %{
5721 base($reg);
5722 index(0x4);
5723 scale(0x0);
5724 disp(0x0);
5725 %}
5726 %}
5727
5728 // Indirect Memory Plus Short Offset Operand
5729 operand indOffset8Narrow(rRegN reg, immL8 off)
5730 %{
5731 predicate(CompressedOops::shift() == 0);
5732 constraint(ALLOC_IN_RC(ptr_reg));
5733 match(AddP (DecodeN reg) off);
5734
5735 format %{ "[$reg + $off (8-bit)]" %}
5736 interface(MEMORY_INTER) %{
5737 base($reg);
5738 index(0x4);
5739 scale(0x0);
5740 disp($off);
5741 %}
5742 %}
5743
5744 // Indirect Memory Plus Long Offset Operand
5745 operand indOffset32Narrow(rRegN reg, immL32 off)
5746 %{
5747 predicate(CompressedOops::shift() == 0);
5748 constraint(ALLOC_IN_RC(ptr_reg));
5749 match(AddP (DecodeN reg) off);
5750
5751 format %{ "[$reg + $off (32-bit)]" %}
5752 interface(MEMORY_INTER) %{
5753 base($reg);
5754 index(0x4);
5755 scale(0x0);
5756 disp($off);
5757 %}
5758 %}
5759
5760 // Indirect Memory Plus Index Register Plus Offset Operand
5761 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5762 %{
5763 predicate(CompressedOops::shift() == 0);
5764 constraint(ALLOC_IN_RC(ptr_reg));
5765 match(AddP (AddP (DecodeN reg) lreg) off);
5766
5767 op_cost(10);
5768 format %{"[$reg + $off + $lreg]" %}
5769 interface(MEMORY_INTER) %{
5770 base($reg);
5771 index($lreg);
5772 scale(0x0);
5773 disp($off);
5774 %}
5775 %}
5776
5777 // Indirect Memory Plus Index Register Plus Offset Operand
5778 operand indIndexNarrow(rRegN reg, rRegL lreg)
5779 %{
5780 predicate(CompressedOops::shift() == 0);
5781 constraint(ALLOC_IN_RC(ptr_reg));
5782 match(AddP (DecodeN reg) lreg);
5783
5784 op_cost(10);
5785 format %{"[$reg + $lreg]" %}
5786 interface(MEMORY_INTER) %{
5787 base($reg);
5788 index($lreg);
5789 scale(0x0);
5790 disp(0x0);
5791 %}
5792 %}
5793
5794 // Indirect Memory Times Scale Plus Index Register
5795 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5796 %{
5797 predicate(CompressedOops::shift() == 0);
5798 constraint(ALLOC_IN_RC(ptr_reg));
5799 match(AddP (DecodeN reg) (LShiftL lreg scale));
5800
5801 op_cost(10);
5802 format %{"[$reg + $lreg << $scale]" %}
5803 interface(MEMORY_INTER) %{
5804 base($reg);
5805 index($lreg);
5806 scale($scale);
5807 disp(0x0);
5808 %}
5809 %}
5810
5811 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5812 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5813 %{
5814 predicate(CompressedOops::shift() == 0);
5815 constraint(ALLOC_IN_RC(ptr_reg));
5816 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5817
5818 op_cost(10);
5819 format %{"[$reg + $off + $lreg << $scale]" %}
5820 interface(MEMORY_INTER) %{
5821 base($reg);
5822 index($lreg);
5823 scale($scale);
5824 disp($off);
5825 %}
5826 %}
5827
5828 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5829 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5830 %{
5831 constraint(ALLOC_IN_RC(ptr_reg));
5832 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5833 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5834
5835 op_cost(10);
5836 format %{"[$reg + $off + $idx]" %}
5837 interface(MEMORY_INTER) %{
5838 base($reg);
5839 index($idx);
5840 scale(0x0);
5841 disp($off);
5842 %}
5843 %}
5844
5845 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5846 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5847 %{
5848 constraint(ALLOC_IN_RC(ptr_reg));
5849 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5850 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5851
5852 op_cost(10);
5853 format %{"[$reg + $off + $idx << $scale]" %}
5854 interface(MEMORY_INTER) %{
5855 base($reg);
5856 index($idx);
5857 scale($scale);
5858 disp($off);
5859 %}
5860 %}
5861
5862 //----------Special Memory Operands--------------------------------------------
5863 // Stack Slot Operand - This operand is used for loading and storing temporary
5864 // values on the stack where a match requires a value to
5865 // flow through memory.
5866 operand stackSlotP(sRegP reg)
5867 %{
5868 constraint(ALLOC_IN_RC(stack_slots));
5869 // No match rule because this operand is only generated in matching
5870
5871 format %{ "[$reg]" %}
5872 interface(MEMORY_INTER) %{
5873 base(0x4); // RSP
5874 index(0x4); // No Index
5875 scale(0x0); // No Scale
5876 disp($reg); // Stack Offset
5877 %}
5878 %}
5879
5880 operand stackSlotI(sRegI reg)
5881 %{
5882 constraint(ALLOC_IN_RC(stack_slots));
5883 // No match rule because this operand is only generated in matching
5884
5885 format %{ "[$reg]" %}
5886 interface(MEMORY_INTER) %{
5887 base(0x4); // RSP
5888 index(0x4); // No Index
5889 scale(0x0); // No Scale
5890 disp($reg); // Stack Offset
5891 %}
5892 %}
5893
5894 operand stackSlotF(sRegF reg)
5895 %{
5896 constraint(ALLOC_IN_RC(stack_slots));
5897 // No match rule because this operand is only generated in matching
5898
5899 format %{ "[$reg]" %}
5900 interface(MEMORY_INTER) %{
5901 base(0x4); // RSP
5902 index(0x4); // No Index
5903 scale(0x0); // No Scale
5904 disp($reg); // Stack Offset
5905 %}
5906 %}
5907
5908 operand stackSlotD(sRegD reg)
5909 %{
5910 constraint(ALLOC_IN_RC(stack_slots));
5911 // No match rule because this operand is only generated in matching
5912
5913 format %{ "[$reg]" %}
5914 interface(MEMORY_INTER) %{
5915 base(0x4); // RSP
5916 index(0x4); // No Index
5917 scale(0x0); // No Scale
5918 disp($reg); // Stack Offset
5919 %}
5920 %}
5921 operand stackSlotL(sRegL reg)
5922 %{
5923 constraint(ALLOC_IN_RC(stack_slots));
5924 // No match rule because this operand is only generated in matching
5925
5926 format %{ "[$reg]" %}
5927 interface(MEMORY_INTER) %{
5928 base(0x4); // RSP
5929 index(0x4); // No Index
5930 scale(0x0); // No Scale
5931 disp($reg); // Stack Offset
5932 %}
5933 %}
5934
5935 //----------Conditional Branch Operands----------------------------------------
5936 // Comparison Op - This is the operation of the comparison, and is limited to
5937 // the following set of codes:
5938 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5939 //
5940 // Other attributes of the comparison, such as unsignedness, are specified
5941 // by the comparison instruction that sets a condition code flags register.
5942 // That result is represented by a flags operand whose subtype is appropriate
5943 // to the unsignedness (etc.) of the comparison.
5944 //
5945 // Later, the instruction which matches both the Comparison Op (a Bool) and
5946 // the flags (produced by the Cmp) specifies the coding of the comparison op
5947 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5948
5949 // Comparison Code
5950 operand cmpOp()
5951 %{
5952 match(Bool);
5953
5954 format %{ "" %}
5955 interface(COND_INTER) %{
5956 equal(0x4, "e");
5957 not_equal(0x5, "ne");
5958 less(0xC, "l");
5959 greater_equal(0xD, "ge");
5960 less_equal(0xE, "le");
5961 greater(0xF, "g");
5962 overflow(0x0, "o");
5963 no_overflow(0x1, "no");
5964 %}
5965 %}
5966
5967 // Comparison Code, unsigned compare. Used by FP also, with
5968 // C2 (unordered) turned into GT or LT already. The other bits
5969 // C0 and C3 are turned into Carry & Zero flags.
5970 operand cmpOpU()
5971 %{
5972 match(Bool);
5973
5974 format %{ "" %}
5975 interface(COND_INTER) %{
5976 equal(0x4, "e");
5977 not_equal(0x5, "ne");
5978 less(0x2, "b");
5979 greater_equal(0x3, "ae");
5980 less_equal(0x6, "be");
5981 greater(0x7, "a");
5982 overflow(0x0, "o");
5983 no_overflow(0x1, "no");
5984 %}
5985 %}
5986
5987
5988 // Floating comparisons that don't require any fixup for the unordered case,
5989 // If both inputs of the comparison are the same, ZF is always set so we
5990 // don't need to use cmpOpUCF2 for eq/ne
5991 operand cmpOpUCF() %{
5992 match(Bool);
5993 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5994 n->as_Bool()->_test._test == BoolTest::ge ||
5995 n->as_Bool()->_test._test == BoolTest::le ||
5996 n->as_Bool()->_test._test == BoolTest::gt ||
5997 n->in(1)->in(1) == n->in(1)->in(2));
5998 format %{ "" %}
5999 interface(COND_INTER) %{
6000 equal(0xb, "np");
6001 not_equal(0xa, "p");
6002 less(0x2, "b");
6003 greater_equal(0x3, "ae");
6004 less_equal(0x6, "be");
6005 greater(0x7, "a");
6006 overflow(0x0, "o");
6007 no_overflow(0x1, "no");
6008 %}
6009 %}
6010
6011
6012 // Floating comparisons that can be fixed up with extra conditional jumps
6013 operand cmpOpUCF2() %{
6014 match(Bool);
6015 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6016 n->as_Bool()->_test._test == BoolTest::eq) &&
6017 n->in(1)->in(1) != n->in(1)->in(2));
6018 format %{ "" %}
6019 interface(COND_INTER) %{
6020 equal(0x4, "e");
6021 not_equal(0x5, "ne");
6022 less(0x2, "b");
6023 greater_equal(0x3, "ae");
6024 less_equal(0x6, "be");
6025 greater(0x7, "a");
6026 overflow(0x0, "o");
6027 no_overflow(0x1, "no");
6028 %}
6029 %}
6030
6031 // Operands for bound floating pointer register arguments
6032 operand rxmm0() %{
6033 constraint(ALLOC_IN_RC(xmm0_reg));
6034 match(VecX);
6035 format%{%}
6036 interface(REG_INTER);
6037 %}
6038
6039 // Vectors
6040
6041 // Dummy generic vector class. Should be used for all vector operands.
6042 // Replaced with vec[SDXYZ] during post-selection pass.
6043 operand vec() %{
6044 constraint(ALLOC_IN_RC(dynamic));
6045 match(VecX);
6046 match(VecY);
6047 match(VecZ);
6048 match(VecS);
6049 match(VecD);
6050
6051 format %{ %}
6052 interface(REG_INTER);
6053 %}
6054
6055 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6056 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6057 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6058 // runtime code generation via reg_class_dynamic.
6059 operand legVec() %{
6060 constraint(ALLOC_IN_RC(dynamic));
6061 match(VecX);
6062 match(VecY);
6063 match(VecZ);
6064 match(VecS);
6065 match(VecD);
6066
6067 format %{ %}
6068 interface(REG_INTER);
6069 %}
6070
6071 // Replaces vec during post-selection cleanup. See above.
6072 operand vecS() %{
6073 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6074 match(VecS);
6075
6076 format %{ %}
6077 interface(REG_INTER);
6078 %}
6079
6080 // Replaces legVec during post-selection cleanup. See above.
6081 operand legVecS() %{
6082 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6083 match(VecS);
6084
6085 format %{ %}
6086 interface(REG_INTER);
6087 %}
6088
6089 // Replaces vec during post-selection cleanup. See above.
6090 operand vecD() %{
6091 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6092 match(VecD);
6093
6094 format %{ %}
6095 interface(REG_INTER);
6096 %}
6097
6098 // Replaces legVec during post-selection cleanup. See above.
6099 operand legVecD() %{
6100 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6101 match(VecD);
6102
6103 format %{ %}
6104 interface(REG_INTER);
6105 %}
6106
6107 // Replaces vec during post-selection cleanup. See above.
6108 operand vecX() %{
6109 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6110 match(VecX);
6111
6112 format %{ %}
6113 interface(REG_INTER);
6114 %}
6115
6116 // Replaces legVec during post-selection cleanup. See above.
6117 operand legVecX() %{
6118 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6119 match(VecX);
6120
6121 format %{ %}
6122 interface(REG_INTER);
6123 %}
6124
6125 // Replaces vec during post-selection cleanup. See above.
6126 operand vecY() %{
6127 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6128 match(VecY);
6129
6130 format %{ %}
6131 interface(REG_INTER);
6132 %}
6133
6134 // Replaces legVec during post-selection cleanup. See above.
6135 operand legVecY() %{
6136 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6137 match(VecY);
6138
6139 format %{ %}
6140 interface(REG_INTER);
6141 %}
6142
6143 // Replaces vec during post-selection cleanup. See above.
6144 operand vecZ() %{
6145 constraint(ALLOC_IN_RC(vectorz_reg));
6146 match(VecZ);
6147
6148 format %{ %}
6149 interface(REG_INTER);
6150 %}
6151
6152 // Replaces legVec during post-selection cleanup. See above.
6153 operand legVecZ() %{
6154 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6155 match(VecZ);
6156
6157 format %{ %}
6158 interface(REG_INTER);
6159 %}
6160
6161 //----------OPERAND CLASSES----------------------------------------------------
6162 // Operand Classes are groups of operands that are used as to simplify
6163 // instruction definitions by not requiring the AD writer to specify separate
6164 // instructions for every form of operand when the instruction accepts
6165 // multiple operand types with the same basic encoding and format. The classic
6166 // case of this is memory operands.
6167
6168 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6169 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6170 indCompressedOopOffset,
6171 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6172 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6173 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6174
6175 //----------PIPELINE-----------------------------------------------------------
6176 // Rules which define the behavior of the target architectures pipeline.
6177 pipeline %{
6178
6179 //----------ATTRIBUTES---------------------------------------------------------
6180 attributes %{
6181 variable_size_instructions; // Fixed size instructions
6182 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6183 instruction_unit_size = 1; // An instruction is 1 bytes long
6184 instruction_fetch_unit_size = 16; // The processor fetches one line
6185 instruction_fetch_units = 1; // of 16 bytes
6186 %}
6187
6188 //----------RESOURCES----------------------------------------------------------
6189 // Resources are the functional units available to the machine
6190
6191 // Generic P2/P3 pipeline
6192 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6193 // 3 instructions decoded per cycle.
6194 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6195 // 3 ALU op, only ALU0 handles mul instructions.
6196 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6197 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6198 BR, FPU,
6199 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6200
6201 //----------PIPELINE DESCRIPTION-----------------------------------------------
6202 // Pipeline Description specifies the stages in the machine's pipeline
6203
6204 // Generic P2/P3 pipeline
6205 pipe_desc(S0, S1, S2, S3, S4, S5);
6206
6207 //----------PIPELINE CLASSES---------------------------------------------------
6208 // Pipeline Classes describe the stages in which input and output are
6209 // referenced by the hardware pipeline.
6210
6211 // Naming convention: ialu or fpu
6212 // Then: _reg
6213 // Then: _reg if there is a 2nd register
6214 // Then: _long if it's a pair of instructions implementing a long
6215 // Then: _fat if it requires the big decoder
6216 // Or: _mem if it requires the big decoder and a memory unit.
6217
6218 // Integer ALU reg operation
6219 pipe_class ialu_reg(rRegI dst)
6220 %{
6221 single_instruction;
6222 dst : S4(write);
6223 dst : S3(read);
6224 DECODE : S0; // any decoder
6225 ALU : S3; // any alu
6226 %}
6227
6228 // Long ALU reg operation
6229 pipe_class ialu_reg_long(rRegL dst)
6230 %{
6231 instruction_count(2);
6232 dst : S4(write);
6233 dst : S3(read);
6234 DECODE : S0(2); // any 2 decoders
6235 ALU : S3(2); // both alus
6236 %}
6237
6238 // Integer ALU reg operation using big decoder
6239 pipe_class ialu_reg_fat(rRegI dst)
6240 %{
6241 single_instruction;
6242 dst : S4(write);
6243 dst : S3(read);
6244 D0 : S0; // big decoder only
6245 ALU : S3; // any alu
6246 %}
6247
6248 // Integer ALU reg-reg operation
6249 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6250 %{
6251 single_instruction;
6252 dst : S4(write);
6253 src : S3(read);
6254 DECODE : S0; // any decoder
6255 ALU : S3; // any alu
6256 %}
6257
6258 // Integer ALU reg-reg operation
6259 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6260 %{
6261 single_instruction;
6262 dst : S4(write);
6263 src : S3(read);
6264 D0 : S0; // big decoder only
6265 ALU : S3; // any alu
6266 %}
6267
6268 // Integer ALU reg-mem operation
6269 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6270 %{
6271 single_instruction;
6272 dst : S5(write);
6273 mem : S3(read);
6274 D0 : S0; // big decoder only
6275 ALU : S4; // any alu
6276 MEM : S3; // any mem
6277 %}
6278
6279 // Integer mem operation (prefetch)
6280 pipe_class ialu_mem(memory mem)
6281 %{
6282 single_instruction;
6283 mem : S3(read);
6284 D0 : S0; // big decoder only
6285 MEM : S3; // any mem
6286 %}
6287
6288 // Integer Store to Memory
6289 pipe_class ialu_mem_reg(memory mem, rRegI src)
6290 %{
6291 single_instruction;
6292 mem : S3(read);
6293 src : S5(read);
6294 D0 : S0; // big decoder only
6295 ALU : S4; // any alu
6296 MEM : S3;
6297 %}
6298
6299 // // Long Store to Memory
6300 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6301 // %{
6302 // instruction_count(2);
6303 // mem : S3(read);
6304 // src : S5(read);
6305 // D0 : S0(2); // big decoder only; twice
6306 // ALU : S4(2); // any 2 alus
6307 // MEM : S3(2); // Both mems
6308 // %}
6309
6310 // Integer Store to Memory
6311 pipe_class ialu_mem_imm(memory mem)
6312 %{
6313 single_instruction;
6314 mem : S3(read);
6315 D0 : S0; // big decoder only
6316 ALU : S4; // any alu
6317 MEM : S3;
6318 %}
6319
6320 // Integer ALU0 reg-reg operation
6321 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6322 %{
6323 single_instruction;
6324 dst : S4(write);
6325 src : S3(read);
6326 D0 : S0; // Big decoder only
6327 ALU0 : S3; // only alu0
6328 %}
6329
6330 // Integer ALU0 reg-mem operation
6331 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6332 %{
6333 single_instruction;
6334 dst : S5(write);
6335 mem : S3(read);
6336 D0 : S0; // big decoder only
6337 ALU0 : S4; // ALU0 only
6338 MEM : S3; // any mem
6339 %}
6340
6341 // Integer ALU reg-reg operation
6342 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6343 %{
6344 single_instruction;
6345 cr : S4(write);
6346 src1 : S3(read);
6347 src2 : S3(read);
6348 DECODE : S0; // any decoder
6349 ALU : S3; // any alu
6350 %}
6351
6352 // Integer ALU reg-imm operation
6353 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6354 %{
6355 single_instruction;
6356 cr : S4(write);
6357 src1 : S3(read);
6358 DECODE : S0; // any decoder
6359 ALU : S3; // any alu
6360 %}
6361
6362 // Integer ALU reg-mem operation
6363 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6364 %{
6365 single_instruction;
6366 cr : S4(write);
6367 src1 : S3(read);
6368 src2 : S3(read);
6369 D0 : S0; // big decoder only
6370 ALU : S4; // any alu
6371 MEM : S3;
6372 %}
6373
6374 // Conditional move reg-reg
6375 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6376 %{
6377 instruction_count(4);
6378 y : S4(read);
6379 q : S3(read);
6380 p : S3(read);
6381 DECODE : S0(4); // any decoder
6382 %}
6383
6384 // Conditional move reg-reg
6385 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6386 %{
6387 single_instruction;
6388 dst : S4(write);
6389 src : S3(read);
6390 cr : S3(read);
6391 DECODE : S0; // any decoder
6392 %}
6393
6394 // Conditional move reg-mem
6395 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6396 %{
6397 single_instruction;
6398 dst : S4(write);
6399 src : S3(read);
6400 cr : S3(read);
6401 DECODE : S0; // any decoder
6402 MEM : S3;
6403 %}
6404
6405 // Conditional move reg-reg long
6406 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6407 %{
6408 single_instruction;
6409 dst : S4(write);
6410 src : S3(read);
6411 cr : S3(read);
6412 DECODE : S0(2); // any 2 decoders
6413 %}
6414
6415 // Float reg-reg operation
6416 pipe_class fpu_reg(regD dst)
6417 %{
6418 instruction_count(2);
6419 dst : S3(read);
6420 DECODE : S0(2); // any 2 decoders
6421 FPU : S3;
6422 %}
6423
6424 // Float reg-reg operation
6425 pipe_class fpu_reg_reg(regD dst, regD src)
6426 %{
6427 instruction_count(2);
6428 dst : S4(write);
6429 src : S3(read);
6430 DECODE : S0(2); // any 2 decoders
6431 FPU : S3;
6432 %}
6433
6434 // Float reg-reg operation
6435 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6436 %{
6437 instruction_count(3);
6438 dst : S4(write);
6439 src1 : S3(read);
6440 src2 : S3(read);
6441 DECODE : S0(3); // any 3 decoders
6442 FPU : S3(2);
6443 %}
6444
6445 // Float reg-reg operation
6446 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6447 %{
6448 instruction_count(4);
6449 dst : S4(write);
6450 src1 : S3(read);
6451 src2 : S3(read);
6452 src3 : S3(read);
6453 DECODE : S0(4); // any 3 decoders
6454 FPU : S3(2);
6455 %}
6456
6457 // Float reg-reg operation
6458 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6459 %{
6460 instruction_count(4);
6461 dst : S4(write);
6462 src1 : S3(read);
6463 src2 : S3(read);
6464 src3 : S3(read);
6465 DECODE : S1(3); // any 3 decoders
6466 D0 : S0; // Big decoder only
6467 FPU : S3(2);
6468 MEM : S3;
6469 %}
6470
6471 // Float reg-mem operation
6472 pipe_class fpu_reg_mem(regD dst, memory mem)
6473 %{
6474 instruction_count(2);
6475 dst : S5(write);
6476 mem : S3(read);
6477 D0 : S0; // big decoder only
6478 DECODE : S1; // any decoder for FPU POP
6479 FPU : S4;
6480 MEM : S3; // any mem
6481 %}
6482
6483 // Float reg-mem operation
6484 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6485 %{
6486 instruction_count(3);
6487 dst : S5(write);
6488 src1 : S3(read);
6489 mem : S3(read);
6490 D0 : S0; // big decoder only
6491 DECODE : S1(2); // any decoder for FPU POP
6492 FPU : S4;
6493 MEM : S3; // any mem
6494 %}
6495
6496 // Float mem-reg operation
6497 pipe_class fpu_mem_reg(memory mem, regD src)
6498 %{
6499 instruction_count(2);
6500 src : S5(read);
6501 mem : S3(read);
6502 DECODE : S0; // any decoder for FPU PUSH
6503 D0 : S1; // big decoder only
6504 FPU : S4;
6505 MEM : S3; // any mem
6506 %}
6507
6508 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6509 %{
6510 instruction_count(3);
6511 src1 : S3(read);
6512 src2 : S3(read);
6513 mem : S3(read);
6514 DECODE : S0(2); // any decoder for FPU PUSH
6515 D0 : S1; // big decoder only
6516 FPU : S4;
6517 MEM : S3; // any mem
6518 %}
6519
6520 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6521 %{
6522 instruction_count(3);
6523 src1 : S3(read);
6524 src2 : S3(read);
6525 mem : S4(read);
6526 DECODE : S0; // any decoder for FPU PUSH
6527 D0 : S0(2); // big decoder only
6528 FPU : S4;
6529 MEM : S3(2); // any mem
6530 %}
6531
6532 pipe_class fpu_mem_mem(memory dst, memory src1)
6533 %{
6534 instruction_count(2);
6535 src1 : S3(read);
6536 dst : S4(read);
6537 D0 : S0(2); // big decoder only
6538 MEM : S3(2); // any mem
6539 %}
6540
6541 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6542 %{
6543 instruction_count(3);
6544 src1 : S3(read);
6545 src2 : S3(read);
6546 dst : S4(read);
6547 D0 : S0(3); // big decoder only
6548 FPU : S4;
6549 MEM : S3(3); // any mem
6550 %}
6551
6552 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6553 %{
6554 instruction_count(3);
6555 src1 : S4(read);
6556 mem : S4(read);
6557 DECODE : S0; // any decoder for FPU PUSH
6558 D0 : S0(2); // big decoder only
6559 FPU : S4;
6560 MEM : S3(2); // any mem
6561 %}
6562
6563 // Float load constant
6564 pipe_class fpu_reg_con(regD dst)
6565 %{
6566 instruction_count(2);
6567 dst : S5(write);
6568 D0 : S0; // big decoder only for the load
6569 DECODE : S1; // any decoder for FPU POP
6570 FPU : S4;
6571 MEM : S3; // any mem
6572 %}
6573
6574 // Float load constant
6575 pipe_class fpu_reg_reg_con(regD dst, regD src)
6576 %{
6577 instruction_count(3);
6578 dst : S5(write);
6579 src : S3(read);
6580 D0 : S0; // big decoder only for the load
6581 DECODE : S1(2); // any decoder for FPU POP
6582 FPU : S4;
6583 MEM : S3; // any mem
6584 %}
6585
6586 // UnConditional branch
6587 pipe_class pipe_jmp(label labl)
6588 %{
6589 single_instruction;
6590 BR : S3;
6591 %}
6592
6593 // Conditional branch
6594 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6595 %{
6596 single_instruction;
6597 cr : S1(read);
6598 BR : S3;
6599 %}
6600
6601 // Allocation idiom
6602 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6603 %{
6604 instruction_count(1); force_serialization;
6605 fixed_latency(6);
6606 heap_ptr : S3(read);
6607 DECODE : S0(3);
6608 D0 : S2;
6609 MEM : S3;
6610 ALU : S3(2);
6611 dst : S5(write);
6612 BR : S5;
6613 %}
6614
6615 // Generic big/slow expanded idiom
6616 pipe_class pipe_slow()
6617 %{
6618 instruction_count(10); multiple_bundles; force_serialization;
6619 fixed_latency(100);
6620 D0 : S0(2);
6621 MEM : S3(2);
6622 %}
6623
6624 // The real do-nothing guy
6625 pipe_class empty()
6626 %{
6627 instruction_count(0);
6628 %}
6629
6630 // Define the class for the Nop node
6631 define
6632 %{
6633 MachNop = empty;
6634 %}
6635
6636 %}
6637
6638 //----------INSTRUCTIONS-------------------------------------------------------
6639 //
6640 // match -- States which machine-independent subtree may be replaced
6641 // by this instruction.
6642 // ins_cost -- The estimated cost of this instruction is used by instruction
6643 // selection to identify a minimum cost tree of machine
6644 // instructions that matches a tree of machine-independent
6645 // instructions.
6646 // format -- A string providing the disassembly for this instruction.
6647 // The value of an instruction's operand may be inserted
6648 // by referring to it with a '$' prefix.
6649 // opcode -- Three instruction opcodes may be provided. These are referred
6650 // to within an encode class as $primary, $secondary, and $tertiary
6651 // rrspectively. The primary opcode is commonly used to
6652 // indicate the type of machine instruction, while secondary
6653 // and tertiary are often used for prefix options or addressing
6654 // modes.
6655 // ins_encode -- A list of encode classes with parameters. The encode class
6656 // name must have been defined in an 'enc_class' specification
6657 // in the encode section of the architecture description.
6658
6659 // ============================================================================
6660
6661 instruct ShouldNotReachHere() %{
6662 match(Halt);
6663 format %{ "stop\t# ShouldNotReachHere" %}
6664 ins_encode %{
6665 if (is_reachable()) {
6666 const char* str = __ code_string(_halt_reason);
6667 __ stop(str);
6668 }
6669 %}
6670 ins_pipe(pipe_slow);
6671 %}
6672
6673 // ============================================================================
6674
6675 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6676 // Load Float
6677 instruct MoveF2VL(vlRegF dst, regF src) %{
6678 match(Set dst src);
6679 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6680 ins_encode %{
6681 ShouldNotReachHere();
6682 %}
6683 ins_pipe( fpu_reg_reg );
6684 %}
6685
6686 // Load Float
6687 instruct MoveF2LEG(legRegF dst, regF src) %{
6688 match(Set dst src);
6689 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6690 ins_encode %{
6691 ShouldNotReachHere();
6692 %}
6693 ins_pipe( fpu_reg_reg );
6694 %}
6695
6696 // Load Float
6697 instruct MoveVL2F(regF dst, vlRegF src) %{
6698 match(Set dst src);
6699 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6700 ins_encode %{
6701 ShouldNotReachHere();
6702 %}
6703 ins_pipe( fpu_reg_reg );
6704 %}
6705
6706 // Load Float
6707 instruct MoveLEG2F(regF dst, legRegF src) %{
6708 match(Set dst src);
6709 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6710 ins_encode %{
6711 ShouldNotReachHere();
6712 %}
6713 ins_pipe( fpu_reg_reg );
6714 %}
6715
6716 // Load Double
6717 instruct MoveD2VL(vlRegD dst, regD src) %{
6718 match(Set dst src);
6719 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6720 ins_encode %{
6721 ShouldNotReachHere();
6722 %}
6723 ins_pipe( fpu_reg_reg );
6724 %}
6725
6726 // Load Double
6727 instruct MoveD2LEG(legRegD dst, regD src) %{
6728 match(Set dst src);
6729 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6730 ins_encode %{
6731 ShouldNotReachHere();
6732 %}
6733 ins_pipe( fpu_reg_reg );
6734 %}
6735
6736 // Load Double
6737 instruct MoveVL2D(regD dst, vlRegD src) %{
6738 match(Set dst src);
6739 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6740 ins_encode %{
6741 ShouldNotReachHere();
6742 %}
6743 ins_pipe( fpu_reg_reg );
6744 %}
6745
6746 // Load Double
6747 instruct MoveLEG2D(regD dst, legRegD src) %{
6748 match(Set dst src);
6749 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6750 ins_encode %{
6751 ShouldNotReachHere();
6752 %}
6753 ins_pipe( fpu_reg_reg );
6754 %}
6755
6756 //----------Load/Store/Move Instructions---------------------------------------
6757 //----------Load Instructions--------------------------------------------------
6758
6759 // Load Byte (8 bit signed)
6760 instruct loadB(rRegI dst, memory mem)
6761 %{
6762 match(Set dst (LoadB mem));
6763
6764 ins_cost(125);
6765 format %{ "movsbl $dst, $mem\t# byte" %}
6766
6767 ins_encode %{
6768 __ movsbl($dst$$Register, $mem$$Address);
6769 %}
6770
6771 ins_pipe(ialu_reg_mem);
6772 %}
6773
6774 // Load Byte (8 bit signed) into Long Register
6775 instruct loadB2L(rRegL dst, memory mem)
6776 %{
6777 match(Set dst (ConvI2L (LoadB mem)));
6778
6779 ins_cost(125);
6780 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6781
6782 ins_encode %{
6783 __ movsbq($dst$$Register, $mem$$Address);
6784 %}
6785
6786 ins_pipe(ialu_reg_mem);
6787 %}
6788
6789 // Load Unsigned Byte (8 bit UNsigned)
6790 instruct loadUB(rRegI dst, memory mem)
6791 %{
6792 match(Set dst (LoadUB mem));
6793
6794 ins_cost(125);
6795 format %{ "movzbl $dst, $mem\t# ubyte" %}
6796
6797 ins_encode %{
6798 __ movzbl($dst$$Register, $mem$$Address);
6799 %}
6800
6801 ins_pipe(ialu_reg_mem);
6802 %}
6803
6804 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6805 instruct loadUB2L(rRegL dst, memory mem)
6806 %{
6807 match(Set dst (ConvI2L (LoadUB mem)));
6808
6809 ins_cost(125);
6810 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6811
6812 ins_encode %{
6813 __ movzbq($dst$$Register, $mem$$Address);
6814 %}
6815
6816 ins_pipe(ialu_reg_mem);
6817 %}
6818
6819 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6820 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6821 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6822 effect(KILL cr);
6823
6824 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6825 "andl $dst, right_n_bits($mask, 8)" %}
6826 ins_encode %{
6827 Register Rdst = $dst$$Register;
6828 __ movzbq(Rdst, $mem$$Address);
6829 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6830 %}
6831 ins_pipe(ialu_reg_mem);
6832 %}
6833
6834 // Load Short (16 bit signed)
6835 instruct loadS(rRegI dst, memory mem)
6836 %{
6837 match(Set dst (LoadS mem));
6838
6839 ins_cost(125);
6840 format %{ "movswl $dst, $mem\t# short" %}
6841
6842 ins_encode %{
6843 __ movswl($dst$$Register, $mem$$Address);
6844 %}
6845
6846 ins_pipe(ialu_reg_mem);
6847 %}
6848
6849 // Load Short (16 bit signed) to Byte (8 bit signed)
6850 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6851 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6852
6853 ins_cost(125);
6854 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6855 ins_encode %{
6856 __ movsbl($dst$$Register, $mem$$Address);
6857 %}
6858 ins_pipe(ialu_reg_mem);
6859 %}
6860
6861 // Load Short (16 bit signed) into Long Register
6862 instruct loadS2L(rRegL dst, memory mem)
6863 %{
6864 match(Set dst (ConvI2L (LoadS mem)));
6865
6866 ins_cost(125);
6867 format %{ "movswq $dst, $mem\t# short -> long" %}
6868
6869 ins_encode %{
6870 __ movswq($dst$$Register, $mem$$Address);
6871 %}
6872
6873 ins_pipe(ialu_reg_mem);
6874 %}
6875
6876 // Load Unsigned Short/Char (16 bit UNsigned)
6877 instruct loadUS(rRegI dst, memory mem)
6878 %{
6879 match(Set dst (LoadUS mem));
6880
6881 ins_cost(125);
6882 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6883
6884 ins_encode %{
6885 __ movzwl($dst$$Register, $mem$$Address);
6886 %}
6887
6888 ins_pipe(ialu_reg_mem);
6889 %}
6890
6891 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6892 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6893 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6894
6895 ins_cost(125);
6896 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6897 ins_encode %{
6898 __ movsbl($dst$$Register, $mem$$Address);
6899 %}
6900 ins_pipe(ialu_reg_mem);
6901 %}
6902
6903 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6904 instruct loadUS2L(rRegL dst, memory mem)
6905 %{
6906 match(Set dst (ConvI2L (LoadUS mem)));
6907
6908 ins_cost(125);
6909 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6910
6911 ins_encode %{
6912 __ movzwq($dst$$Register, $mem$$Address);
6913 %}
6914
6915 ins_pipe(ialu_reg_mem);
6916 %}
6917
6918 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6919 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6920 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6921
6922 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
6923 ins_encode %{
6924 __ movzbq($dst$$Register, $mem$$Address);
6925 %}
6926 ins_pipe(ialu_reg_mem);
6927 %}
6928
6929 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
6930 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6931 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6932 effect(KILL cr);
6933
6934 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
6935 "andl $dst, right_n_bits($mask, 16)" %}
6936 ins_encode %{
6937 Register Rdst = $dst$$Register;
6938 __ movzwq(Rdst, $mem$$Address);
6939 __ andl(Rdst, $mask$$constant & right_n_bits(16));
6940 %}
6941 ins_pipe(ialu_reg_mem);
6942 %}
6943
6944 // Load Integer
6945 instruct loadI(rRegI dst, memory mem)
6946 %{
6947 match(Set dst (LoadI mem));
6948
6949 ins_cost(125);
6950 format %{ "movl $dst, $mem\t# int" %}
6951
6952 ins_encode %{
6953 __ movl($dst$$Register, $mem$$Address);
6954 %}
6955
6956 ins_pipe(ialu_reg_mem);
6957 %}
6958
6959 // Load Integer (32 bit signed) to Byte (8 bit signed)
6960 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6961 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6962
6963 ins_cost(125);
6964 format %{ "movsbl $dst, $mem\t# int -> byte" %}
6965 ins_encode %{
6966 __ movsbl($dst$$Register, $mem$$Address);
6967 %}
6968 ins_pipe(ialu_reg_mem);
6969 %}
6970
6971 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6972 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
6973 match(Set dst (AndI (LoadI mem) mask));
6974
6975 ins_cost(125);
6976 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
6977 ins_encode %{
6978 __ movzbl($dst$$Register, $mem$$Address);
6979 %}
6980 ins_pipe(ialu_reg_mem);
6981 %}
6982
6983 // Load Integer (32 bit signed) to Short (16 bit signed)
6984 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
6985 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6986
6987 ins_cost(125);
6988 format %{ "movswl $dst, $mem\t# int -> short" %}
6989 ins_encode %{
6990 __ movswl($dst$$Register, $mem$$Address);
6991 %}
6992 ins_pipe(ialu_reg_mem);
6993 %}
6994
6995 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6996 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
6997 match(Set dst (AndI (LoadI mem) mask));
6998
6999 ins_cost(125);
7000 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7001 ins_encode %{
7002 __ movzwl($dst$$Register, $mem$$Address);
7003 %}
7004 ins_pipe(ialu_reg_mem);
7005 %}
7006
7007 // Load Integer into Long Register
7008 instruct loadI2L(rRegL dst, memory mem)
7009 %{
7010 match(Set dst (ConvI2L (LoadI mem)));
7011
7012 ins_cost(125);
7013 format %{ "movslq $dst, $mem\t# int -> long" %}
7014
7015 ins_encode %{
7016 __ movslq($dst$$Register, $mem$$Address);
7017 %}
7018
7019 ins_pipe(ialu_reg_mem);
7020 %}
7021
7022 // Load Integer with mask 0xFF into Long Register
7023 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7024 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7025
7026 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7027 ins_encode %{
7028 __ movzbq($dst$$Register, $mem$$Address);
7029 %}
7030 ins_pipe(ialu_reg_mem);
7031 %}
7032
7033 // Load Integer with mask 0xFFFF into Long Register
7034 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7035 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7036
7037 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7038 ins_encode %{
7039 __ movzwq($dst$$Register, $mem$$Address);
7040 %}
7041 ins_pipe(ialu_reg_mem);
7042 %}
7043
7044 // Load Integer with a 31-bit mask into Long Register
7045 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7046 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7047 effect(KILL cr);
7048
7049 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7050 "andl $dst, $mask" %}
7051 ins_encode %{
7052 Register Rdst = $dst$$Register;
7053 __ movl(Rdst, $mem$$Address);
7054 __ andl(Rdst, $mask$$constant);
7055 %}
7056 ins_pipe(ialu_reg_mem);
7057 %}
7058
7059 // Load Unsigned Integer into Long Register
7060 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7061 %{
7062 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7063
7064 ins_cost(125);
7065 format %{ "movl $dst, $mem\t# uint -> long" %}
7066
7067 ins_encode %{
7068 __ movl($dst$$Register, $mem$$Address);
7069 %}
7070
7071 ins_pipe(ialu_reg_mem);
7072 %}
7073
7074 // Load Long
7075 instruct loadL(rRegL dst, memory mem)
7076 %{
7077 match(Set dst (LoadL mem));
7078
7079 ins_cost(125);
7080 format %{ "movq $dst, $mem\t# long" %}
7081
7082 ins_encode %{
7083 __ movq($dst$$Register, $mem$$Address);
7084 %}
7085
7086 ins_pipe(ialu_reg_mem); // XXX
7087 %}
7088
7089 // Load Range
7090 instruct loadRange(rRegI dst, memory mem)
7091 %{
7092 match(Set dst (LoadRange mem));
7093
7094 ins_cost(125); // XXX
7095 format %{ "movl $dst, $mem\t# range" %}
7096 ins_encode %{
7097 __ movl($dst$$Register, $mem$$Address);
7098 %}
7099 ins_pipe(ialu_reg_mem);
7100 %}
7101
7102 // Load Pointer
7103 instruct loadP(rRegP dst, memory mem)
7104 %{
7105 match(Set dst (LoadP mem));
7106 predicate(n->as_Load()->barrier_data() == 0);
7107
7108 ins_cost(125); // XXX
7109 format %{ "movq $dst, $mem\t# ptr" %}
7110 ins_encode %{
7111 __ movq($dst$$Register, $mem$$Address);
7112 %}
7113 ins_pipe(ialu_reg_mem); // XXX
7114 %}
7115
7116 // Load Compressed Pointer
7117 instruct loadN(rRegN dst, memory mem)
7118 %{
7119 predicate(n->as_Load()->barrier_data() == 0);
7120 match(Set dst (LoadN mem));
7121
7122 ins_cost(125); // XXX
7123 format %{ "movl $dst, $mem\t# compressed ptr" %}
7124 ins_encode %{
7125 __ movl($dst$$Register, $mem$$Address);
7126 %}
7127 ins_pipe(ialu_reg_mem); // XXX
7128 %}
7129
7130
7131 // Load Klass Pointer
7132 instruct loadKlass(rRegP dst, memory mem)
7133 %{
7134 match(Set dst (LoadKlass mem));
7135
7136 ins_cost(125); // XXX
7137 format %{ "movq $dst, $mem\t# class" %}
7138 ins_encode %{
7139 __ movq($dst$$Register, $mem$$Address);
7140 %}
7141 ins_pipe(ialu_reg_mem); // XXX
7142 %}
7143
7144 // Load narrow Klass Pointer
7145 instruct loadNKlass(rRegN dst, memory mem)
7146 %{
7147 predicate(!UseCompactObjectHeaders);
7148 match(Set dst (LoadNKlass mem));
7149
7150 ins_cost(125); // XXX
7151 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7152 ins_encode %{
7153 __ movl($dst$$Register, $mem$$Address);
7154 %}
7155 ins_pipe(ialu_reg_mem); // XXX
7156 %}
7157
7158 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7159 %{
7160 predicate(UseCompactObjectHeaders);
7161 match(Set dst (LoadNKlass mem));
7162 effect(KILL cr);
7163 ins_cost(125);
7164 format %{
7165 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7166 "shrl $dst, markWord::klass_shift"
7167 %}
7168 ins_encode %{
7169 // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
7170 // obj-start, so that we can load from the object's mark-word instead.
7171 Register d = $dst$$Register;
7172 Address s = ($mem$$Address).plus_disp(-Type::klass_offset());
7173 if (UseAPX) {
7174 __ eshrl(d, s, markWord::klass_shift, false);
7175 } else {
7176 __ movl(d, s);
7177 __ shrl(d, markWord::klass_shift);
7178 }
7179 %}
7180 ins_pipe(ialu_reg_mem);
7181 %}
7182
7183 // Load Float
7184 instruct loadF(regF dst, memory mem)
7185 %{
7186 match(Set dst (LoadF mem));
7187
7188 ins_cost(145); // XXX
7189 format %{ "movss $dst, $mem\t# float" %}
7190 ins_encode %{
7191 __ movflt($dst$$XMMRegister, $mem$$Address);
7192 %}
7193 ins_pipe(pipe_slow); // XXX
7194 %}
7195
7196 // Load Double
7197 instruct loadD_partial(regD dst, memory mem)
7198 %{
7199 predicate(!UseXmmLoadAndClearUpper);
7200 match(Set dst (LoadD mem));
7201
7202 ins_cost(145); // XXX
7203 format %{ "movlpd $dst, $mem\t# double" %}
7204 ins_encode %{
7205 __ movdbl($dst$$XMMRegister, $mem$$Address);
7206 %}
7207 ins_pipe(pipe_slow); // XXX
7208 %}
7209
7210 instruct loadD(regD dst, memory mem)
7211 %{
7212 predicate(UseXmmLoadAndClearUpper);
7213 match(Set dst (LoadD mem));
7214
7215 ins_cost(145); // XXX
7216 format %{ "movsd $dst, $mem\t# double" %}
7217 ins_encode %{
7218 __ movdbl($dst$$XMMRegister, $mem$$Address);
7219 %}
7220 ins_pipe(pipe_slow); // XXX
7221 %}
7222
7223 // max = java.lang.Math.max(float a, float b)
7224 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
7225 predicate(VM_Version::supports_avx10_2());
7226 match(Set dst (MaxF a b));
7227 format %{ "maxF $dst, $a, $b" %}
7228 ins_encode %{
7229 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7230 %}
7231 ins_pipe( pipe_slow );
7232 %}
7233
7234 // max = java.lang.Math.max(float a, float b)
7235 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7236 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7237 match(Set dst (MaxF a b));
7238 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7239 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7240 ins_encode %{
7241 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7242 %}
7243 ins_pipe( pipe_slow );
7244 %}
7245
7246 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7247 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7248 match(Set dst (MaxF a b));
7249 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7250
7251 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7252 ins_encode %{
7253 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7254 false /*min*/, true /*single*/);
7255 %}
7256 ins_pipe( pipe_slow );
7257 %}
7258
7259 // max = java.lang.Math.max(double a, double b)
7260 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
7261 predicate(VM_Version::supports_avx10_2());
7262 match(Set dst (MaxD a b));
7263 format %{ "maxD $dst, $a, $b" %}
7264 ins_encode %{
7265 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7266 %}
7267 ins_pipe( pipe_slow );
7268 %}
7269
7270 // max = java.lang.Math.max(double a, double b)
7271 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7272 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7273 match(Set dst (MaxD a b));
7274 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7275 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7276 ins_encode %{
7277 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7278 %}
7279 ins_pipe( pipe_slow );
7280 %}
7281
7282 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7283 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7284 match(Set dst (MaxD a b));
7285 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7286
7287 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7288 ins_encode %{
7289 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7290 false /*min*/, false /*single*/);
7291 %}
7292 ins_pipe( pipe_slow );
7293 %}
7294
7295 // max = java.lang.Math.min(float a, float b)
7296 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
7297 predicate(VM_Version::supports_avx10_2());
7298 match(Set dst (MinF a b));
7299 format %{ "minF $dst, $a, $b" %}
7300 ins_encode %{
7301 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7302 %}
7303 ins_pipe( pipe_slow );
7304 %}
7305
7306 // min = java.lang.Math.min(float a, float b)
7307 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7308 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7309 match(Set dst (MinF a b));
7310 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7311 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7312 ins_encode %{
7313 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7314 %}
7315 ins_pipe( pipe_slow );
7316 %}
7317
7318 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7319 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7320 match(Set dst (MinF a b));
7321 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7322
7323 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7324 ins_encode %{
7325 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7326 true /*min*/, true /*single*/);
7327 %}
7328 ins_pipe( pipe_slow );
7329 %}
7330
7331 // max = java.lang.Math.min(double a, double b)
7332 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
7333 predicate(VM_Version::supports_avx10_2());
7334 match(Set dst (MinD a b));
7335 format %{ "minD $dst, $a, $b" %}
7336 ins_encode %{
7337 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7338 %}
7339 ins_pipe( pipe_slow );
7340 %}
7341
7342 // min = java.lang.Math.min(double a, double b)
7343 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7344 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7345 match(Set dst (MinD a b));
7346 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7347 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7348 ins_encode %{
7349 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7350 %}
7351 ins_pipe( pipe_slow );
7352 %}
7353
7354 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7355 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7356 match(Set dst (MinD a b));
7357 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7358
7359 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7360 ins_encode %{
7361 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7362 true /*min*/, false /*single*/);
7363 %}
7364 ins_pipe( pipe_slow );
7365 %}
7366
7367 // Load Effective Address
7368 instruct leaP8(rRegP dst, indOffset8 mem)
7369 %{
7370 match(Set dst mem);
7371
7372 ins_cost(110); // XXX
7373 format %{ "leaq $dst, $mem\t# ptr 8" %}
7374 ins_encode %{
7375 __ leaq($dst$$Register, $mem$$Address);
7376 %}
7377 ins_pipe(ialu_reg_reg_fat);
7378 %}
7379
7380 instruct leaP32(rRegP dst, indOffset32 mem)
7381 %{
7382 match(Set dst mem);
7383
7384 ins_cost(110);
7385 format %{ "leaq $dst, $mem\t# ptr 32" %}
7386 ins_encode %{
7387 __ leaq($dst$$Register, $mem$$Address);
7388 %}
7389 ins_pipe(ialu_reg_reg_fat);
7390 %}
7391
7392 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7393 %{
7394 match(Set dst mem);
7395
7396 ins_cost(110);
7397 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7398 ins_encode %{
7399 __ leaq($dst$$Register, $mem$$Address);
7400 %}
7401 ins_pipe(ialu_reg_reg_fat);
7402 %}
7403
7404 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7405 %{
7406 match(Set dst mem);
7407
7408 ins_cost(110);
7409 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7410 ins_encode %{
7411 __ leaq($dst$$Register, $mem$$Address);
7412 %}
7413 ins_pipe(ialu_reg_reg_fat);
7414 %}
7415
7416 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7417 %{
7418 match(Set dst mem);
7419
7420 ins_cost(110);
7421 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7422 ins_encode %{
7423 __ leaq($dst$$Register, $mem$$Address);
7424 %}
7425 ins_pipe(ialu_reg_reg_fat);
7426 %}
7427
7428 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7429 %{
7430 match(Set dst mem);
7431
7432 ins_cost(110);
7433 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7434 ins_encode %{
7435 __ leaq($dst$$Register, $mem$$Address);
7436 %}
7437 ins_pipe(ialu_reg_reg_fat);
7438 %}
7439
7440 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7441 %{
7442 match(Set dst mem);
7443
7444 ins_cost(110);
7445 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7446 ins_encode %{
7447 __ leaq($dst$$Register, $mem$$Address);
7448 %}
7449 ins_pipe(ialu_reg_reg_fat);
7450 %}
7451
7452 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7453 %{
7454 match(Set dst mem);
7455
7456 ins_cost(110);
7457 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7458 ins_encode %{
7459 __ leaq($dst$$Register, $mem$$Address);
7460 %}
7461 ins_pipe(ialu_reg_reg_fat);
7462 %}
7463
7464 // Load Effective Address which uses Narrow (32-bits) oop
7465 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7466 %{
7467 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7468 match(Set dst mem);
7469
7470 ins_cost(110);
7471 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7472 ins_encode %{
7473 __ leaq($dst$$Register, $mem$$Address);
7474 %}
7475 ins_pipe(ialu_reg_reg_fat);
7476 %}
7477
7478 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7479 %{
7480 predicate(CompressedOops::shift() == 0);
7481 match(Set dst mem);
7482
7483 ins_cost(110); // XXX
7484 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7485 ins_encode %{
7486 __ leaq($dst$$Register, $mem$$Address);
7487 %}
7488 ins_pipe(ialu_reg_reg_fat);
7489 %}
7490
7491 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7492 %{
7493 predicate(CompressedOops::shift() == 0);
7494 match(Set dst mem);
7495
7496 ins_cost(110);
7497 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7498 ins_encode %{
7499 __ leaq($dst$$Register, $mem$$Address);
7500 %}
7501 ins_pipe(ialu_reg_reg_fat);
7502 %}
7503
7504 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7505 %{
7506 predicate(CompressedOops::shift() == 0);
7507 match(Set dst mem);
7508
7509 ins_cost(110);
7510 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7511 ins_encode %{
7512 __ leaq($dst$$Register, $mem$$Address);
7513 %}
7514 ins_pipe(ialu_reg_reg_fat);
7515 %}
7516
7517 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7518 %{
7519 predicate(CompressedOops::shift() == 0);
7520 match(Set dst mem);
7521
7522 ins_cost(110);
7523 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7524 ins_encode %{
7525 __ leaq($dst$$Register, $mem$$Address);
7526 %}
7527 ins_pipe(ialu_reg_reg_fat);
7528 %}
7529
7530 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7531 %{
7532 predicate(CompressedOops::shift() == 0);
7533 match(Set dst mem);
7534
7535 ins_cost(110);
7536 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7537 ins_encode %{
7538 __ leaq($dst$$Register, $mem$$Address);
7539 %}
7540 ins_pipe(ialu_reg_reg_fat);
7541 %}
7542
7543 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7544 %{
7545 predicate(CompressedOops::shift() == 0);
7546 match(Set dst mem);
7547
7548 ins_cost(110);
7549 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7550 ins_encode %{
7551 __ leaq($dst$$Register, $mem$$Address);
7552 %}
7553 ins_pipe(ialu_reg_reg_fat);
7554 %}
7555
7556 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7557 %{
7558 predicate(CompressedOops::shift() == 0);
7559 match(Set dst mem);
7560
7561 ins_cost(110);
7562 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7563 ins_encode %{
7564 __ leaq($dst$$Register, $mem$$Address);
7565 %}
7566 ins_pipe(ialu_reg_reg_fat);
7567 %}
7568
7569 instruct loadConI(rRegI dst, immI src)
7570 %{
7571 match(Set dst src);
7572
7573 format %{ "movl $dst, $src\t# int" %}
7574 ins_encode %{
7575 __ movl($dst$$Register, $src$$constant);
7576 %}
7577 ins_pipe(ialu_reg_fat); // XXX
7578 %}
7579
7580 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7581 %{
7582 match(Set dst src);
7583 effect(KILL cr);
7584
7585 ins_cost(50);
7586 format %{ "xorl $dst, $dst\t# int" %}
7587 ins_encode %{
7588 __ xorl($dst$$Register, $dst$$Register);
7589 %}
7590 ins_pipe(ialu_reg);
7591 %}
7592
7593 instruct loadConL(rRegL dst, immL src)
7594 %{
7595 match(Set dst src);
7596
7597 ins_cost(150);
7598 format %{ "movq $dst, $src\t# long" %}
7599 ins_encode %{
7600 __ mov64($dst$$Register, $src$$constant);
7601 %}
7602 ins_pipe(ialu_reg);
7603 %}
7604
7605 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7606 %{
7607 match(Set dst src);
7608 effect(KILL cr);
7609
7610 ins_cost(50);
7611 format %{ "xorl $dst, $dst\t# long" %}
7612 ins_encode %{
7613 __ xorl($dst$$Register, $dst$$Register);
7614 %}
7615 ins_pipe(ialu_reg); // XXX
7616 %}
7617
7618 instruct loadConUL32(rRegL dst, immUL32 src)
7619 %{
7620 match(Set dst src);
7621
7622 ins_cost(60);
7623 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7624 ins_encode %{
7625 __ movl($dst$$Register, $src$$constant);
7626 %}
7627 ins_pipe(ialu_reg);
7628 %}
7629
7630 instruct loadConL32(rRegL dst, immL32 src)
7631 %{
7632 match(Set dst src);
7633
7634 ins_cost(70);
7635 format %{ "movq $dst, $src\t# long (32-bit)" %}
7636 ins_encode %{
7637 __ movq($dst$$Register, $src$$constant);
7638 %}
7639 ins_pipe(ialu_reg);
7640 %}
7641
7642 instruct loadConP(rRegP dst, immP con) %{
7643 match(Set dst con);
7644
7645 format %{ "movq $dst, $con\t# ptr" %}
7646 ins_encode %{
7647 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7648 %}
7649 ins_pipe(ialu_reg_fat); // XXX
7650 %}
7651
7652 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7653 %{
7654 match(Set dst src);
7655 effect(KILL cr);
7656
7657 ins_cost(50);
7658 format %{ "xorl $dst, $dst\t# ptr" %}
7659 ins_encode %{
7660 __ xorl($dst$$Register, $dst$$Register);
7661 %}
7662 ins_pipe(ialu_reg);
7663 %}
7664
7665 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7666 %{
7667 match(Set dst src);
7668 effect(KILL cr);
7669
7670 ins_cost(60);
7671 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7672 ins_encode %{
7673 __ movl($dst$$Register, $src$$constant);
7674 %}
7675 ins_pipe(ialu_reg);
7676 %}
7677
7678 instruct loadConF(regF dst, immF con) %{
7679 match(Set dst con);
7680 ins_cost(125);
7681 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7682 ins_encode %{
7683 __ movflt($dst$$XMMRegister, $constantaddress($con));
7684 %}
7685 ins_pipe(pipe_slow);
7686 %}
7687
7688 instruct loadConH(regF dst, immH con) %{
7689 match(Set dst con);
7690 ins_cost(125);
7691 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7692 ins_encode %{
7693 __ movflt($dst$$XMMRegister, $constantaddress($con));
7694 %}
7695 ins_pipe(pipe_slow);
7696 %}
7697
7698 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7699 match(Set dst src);
7700 effect(KILL cr);
7701 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7702 ins_encode %{
7703 __ xorq($dst$$Register, $dst$$Register);
7704 %}
7705 ins_pipe(ialu_reg);
7706 %}
7707
7708 instruct loadConN(rRegN dst, immN src) %{
7709 match(Set dst src);
7710
7711 ins_cost(125);
7712 format %{ "movl $dst, $src\t# compressed ptr" %}
7713 ins_encode %{
7714 address con = (address)$src$$constant;
7715 if (con == nullptr) {
7716 ShouldNotReachHere();
7717 } else {
7718 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7719 }
7720 %}
7721 ins_pipe(ialu_reg_fat); // XXX
7722 %}
7723
7724 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7725 match(Set dst src);
7726
7727 ins_cost(125);
7728 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7729 ins_encode %{
7730 address con = (address)$src$$constant;
7731 if (con == nullptr) {
7732 ShouldNotReachHere();
7733 } else {
7734 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7735 }
7736 %}
7737 ins_pipe(ialu_reg_fat); // XXX
7738 %}
7739
7740 instruct loadConF0(regF dst, immF0 src)
7741 %{
7742 match(Set dst src);
7743 ins_cost(100);
7744
7745 format %{ "xorps $dst, $dst\t# float 0.0" %}
7746 ins_encode %{
7747 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7748 %}
7749 ins_pipe(pipe_slow);
7750 %}
7751
7752 // Use the same format since predicate() can not be used here.
7753 instruct loadConD(regD dst, immD con) %{
7754 match(Set dst con);
7755 ins_cost(125);
7756 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7757 ins_encode %{
7758 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7759 %}
7760 ins_pipe(pipe_slow);
7761 %}
7762
7763 instruct loadConD0(regD dst, immD0 src)
7764 %{
7765 match(Set dst src);
7766 ins_cost(100);
7767
7768 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7769 ins_encode %{
7770 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7771 %}
7772 ins_pipe(pipe_slow);
7773 %}
7774
7775 instruct loadSSI(rRegI dst, stackSlotI src)
7776 %{
7777 match(Set dst src);
7778
7779 ins_cost(125);
7780 format %{ "movl $dst, $src\t# int stk" %}
7781 ins_encode %{
7782 __ movl($dst$$Register, $src$$Address);
7783 %}
7784 ins_pipe(ialu_reg_mem);
7785 %}
7786
7787 instruct loadSSL(rRegL dst, stackSlotL src)
7788 %{
7789 match(Set dst src);
7790
7791 ins_cost(125);
7792 format %{ "movq $dst, $src\t# long stk" %}
7793 ins_encode %{
7794 __ movq($dst$$Register, $src$$Address);
7795 %}
7796 ins_pipe(ialu_reg_mem);
7797 %}
7798
7799 instruct loadSSP(rRegP dst, stackSlotP src)
7800 %{
7801 match(Set dst src);
7802
7803 ins_cost(125);
7804 format %{ "movq $dst, $src\t# ptr stk" %}
7805 ins_encode %{
7806 __ movq($dst$$Register, $src$$Address);
7807 %}
7808 ins_pipe(ialu_reg_mem);
7809 %}
7810
7811 instruct loadSSF(regF dst, stackSlotF src)
7812 %{
7813 match(Set dst src);
7814
7815 ins_cost(125);
7816 format %{ "movss $dst, $src\t# float stk" %}
7817 ins_encode %{
7818 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7819 %}
7820 ins_pipe(pipe_slow); // XXX
7821 %}
7822
7823 // Use the same format since predicate() can not be used here.
7824 instruct loadSSD(regD dst, stackSlotD src)
7825 %{
7826 match(Set dst src);
7827
7828 ins_cost(125);
7829 format %{ "movsd $dst, $src\t# double stk" %}
7830 ins_encode %{
7831 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7832 %}
7833 ins_pipe(pipe_slow); // XXX
7834 %}
7835
7836 // Prefetch instructions for allocation.
7837 // Must be safe to execute with invalid address (cannot fault).
7838
7839 instruct prefetchAlloc( memory mem ) %{
7840 predicate(AllocatePrefetchInstr==3);
7841 match(PrefetchAllocation mem);
7842 ins_cost(125);
7843
7844 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7845 ins_encode %{
7846 __ prefetchw($mem$$Address);
7847 %}
7848 ins_pipe(ialu_mem);
7849 %}
7850
7851 instruct prefetchAllocNTA( memory mem ) %{
7852 predicate(AllocatePrefetchInstr==0);
7853 match(PrefetchAllocation mem);
7854 ins_cost(125);
7855
7856 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7857 ins_encode %{
7858 __ prefetchnta($mem$$Address);
7859 %}
7860 ins_pipe(ialu_mem);
7861 %}
7862
7863 instruct prefetchAllocT0( memory mem ) %{
7864 predicate(AllocatePrefetchInstr==1);
7865 match(PrefetchAllocation mem);
7866 ins_cost(125);
7867
7868 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7869 ins_encode %{
7870 __ prefetcht0($mem$$Address);
7871 %}
7872 ins_pipe(ialu_mem);
7873 %}
7874
7875 instruct prefetchAllocT2( memory mem ) %{
7876 predicate(AllocatePrefetchInstr==2);
7877 match(PrefetchAllocation mem);
7878 ins_cost(125);
7879
7880 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7881 ins_encode %{
7882 __ prefetcht2($mem$$Address);
7883 %}
7884 ins_pipe(ialu_mem);
7885 %}
7886
7887 //----------Store Instructions-------------------------------------------------
7888
7889 // Store Byte
7890 instruct storeB(memory mem, rRegI src)
7891 %{
7892 match(Set mem (StoreB mem src));
7893
7894 ins_cost(125); // XXX
7895 format %{ "movb $mem, $src\t# byte" %}
7896 ins_encode %{
7897 __ movb($mem$$Address, $src$$Register);
7898 %}
7899 ins_pipe(ialu_mem_reg);
7900 %}
7901
7902 // Store Char/Short
7903 instruct storeC(memory mem, rRegI src)
7904 %{
7905 match(Set mem (StoreC mem src));
7906
7907 ins_cost(125); // XXX
7908 format %{ "movw $mem, $src\t# char/short" %}
7909 ins_encode %{
7910 __ movw($mem$$Address, $src$$Register);
7911 %}
7912 ins_pipe(ialu_mem_reg);
7913 %}
7914
7915 // Store Integer
7916 instruct storeI(memory mem, rRegI src)
7917 %{
7918 match(Set mem (StoreI mem src));
7919
7920 ins_cost(125); // XXX
7921 format %{ "movl $mem, $src\t# int" %}
7922 ins_encode %{
7923 __ movl($mem$$Address, $src$$Register);
7924 %}
7925 ins_pipe(ialu_mem_reg);
7926 %}
7927
7928 // Store Long
7929 instruct storeL(memory mem, rRegL src)
7930 %{
7931 match(Set mem (StoreL mem src));
7932
7933 ins_cost(125); // XXX
7934 format %{ "movq $mem, $src\t# long" %}
7935 ins_encode %{
7936 __ movq($mem$$Address, $src$$Register);
7937 %}
7938 ins_pipe(ialu_mem_reg); // XXX
7939 %}
7940
7941 // Store Pointer
7942 instruct storeP(memory mem, any_RegP src)
7943 %{
7944 predicate(n->as_Store()->barrier_data() == 0);
7945 match(Set mem (StoreP mem src));
7946
7947 ins_cost(125); // XXX
7948 format %{ "movq $mem, $src\t# ptr" %}
7949 ins_encode %{
7950 __ movq($mem$$Address, $src$$Register);
7951 %}
7952 ins_pipe(ialu_mem_reg);
7953 %}
7954
7955 instruct storeImmP0(memory mem, immP0 zero)
7956 %{
7957 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
7958 match(Set mem (StoreP mem zero));
7959
7960 ins_cost(125); // XXX
7961 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
7962 ins_encode %{
7963 __ movq($mem$$Address, r12);
7964 %}
7965 ins_pipe(ialu_mem_reg);
7966 %}
7967
7968 // Store Null Pointer, mark word, or other simple pointer constant.
7969 instruct storeImmP(memory mem, immP31 src)
7970 %{
7971 predicate(n->as_Store()->barrier_data() == 0);
7972 match(Set mem (StoreP mem src));
7973
7974 ins_cost(150); // XXX
7975 format %{ "movq $mem, $src\t# ptr" %}
7976 ins_encode %{
7977 __ movq($mem$$Address, $src$$constant);
7978 %}
7979 ins_pipe(ialu_mem_imm);
7980 %}
7981
7982 // Store Compressed Pointer
7983 instruct storeN(memory mem, rRegN src)
7984 %{
7985 predicate(n->as_Store()->barrier_data() == 0);
7986 match(Set mem (StoreN mem src));
7987
7988 ins_cost(125); // XXX
7989 format %{ "movl $mem, $src\t# compressed ptr" %}
7990 ins_encode %{
7991 __ movl($mem$$Address, $src$$Register);
7992 %}
7993 ins_pipe(ialu_mem_reg);
7994 %}
7995
7996 instruct storeNKlass(memory mem, rRegN src)
7997 %{
7998 match(Set mem (StoreNKlass mem src));
7999
8000 ins_cost(125); // XXX
8001 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8002 ins_encode %{
8003 __ movl($mem$$Address, $src$$Register);
8004 %}
8005 ins_pipe(ialu_mem_reg);
8006 %}
8007
8008 instruct storeImmN0(memory mem, immN0 zero)
8009 %{
8010 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8011 match(Set mem (StoreN mem zero));
8012
8013 ins_cost(125); // XXX
8014 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8015 ins_encode %{
8016 __ movl($mem$$Address, r12);
8017 %}
8018 ins_pipe(ialu_mem_reg);
8019 %}
8020
8021 instruct storeImmN(memory mem, immN src)
8022 %{
8023 predicate(n->as_Store()->barrier_data() == 0);
8024 match(Set mem (StoreN mem src));
8025
8026 ins_cost(150); // XXX
8027 format %{ "movl $mem, $src\t# compressed ptr" %}
8028 ins_encode %{
8029 address con = (address)$src$$constant;
8030 if (con == nullptr) {
8031 __ movl($mem$$Address, 0);
8032 } else {
8033 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8034 }
8035 %}
8036 ins_pipe(ialu_mem_imm);
8037 %}
8038
8039 instruct storeImmNKlass(memory mem, immNKlass src)
8040 %{
8041 match(Set mem (StoreNKlass mem src));
8042
8043 ins_cost(150); // XXX
8044 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8045 ins_encode %{
8046 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8047 %}
8048 ins_pipe(ialu_mem_imm);
8049 %}
8050
8051 // Store Integer Immediate
8052 instruct storeImmI0(memory mem, immI_0 zero)
8053 %{
8054 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8055 match(Set mem (StoreI mem zero));
8056
8057 ins_cost(125); // XXX
8058 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8059 ins_encode %{
8060 __ movl($mem$$Address, r12);
8061 %}
8062 ins_pipe(ialu_mem_reg);
8063 %}
8064
8065 instruct storeImmI(memory mem, immI src)
8066 %{
8067 match(Set mem (StoreI mem src));
8068
8069 ins_cost(150);
8070 format %{ "movl $mem, $src\t# int" %}
8071 ins_encode %{
8072 __ movl($mem$$Address, $src$$constant);
8073 %}
8074 ins_pipe(ialu_mem_imm);
8075 %}
8076
8077 // Store Long Immediate
8078 instruct storeImmL0(memory mem, immL0 zero)
8079 %{
8080 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8081 match(Set mem (StoreL mem zero));
8082
8083 ins_cost(125); // XXX
8084 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8085 ins_encode %{
8086 __ movq($mem$$Address, r12);
8087 %}
8088 ins_pipe(ialu_mem_reg);
8089 %}
8090
8091 instruct storeImmL(memory mem, immL32 src)
8092 %{
8093 match(Set mem (StoreL mem src));
8094
8095 ins_cost(150);
8096 format %{ "movq $mem, $src\t# long" %}
8097 ins_encode %{
8098 __ movq($mem$$Address, $src$$constant);
8099 %}
8100 ins_pipe(ialu_mem_imm);
8101 %}
8102
8103 // Store Short/Char Immediate
8104 instruct storeImmC0(memory mem, immI_0 zero)
8105 %{
8106 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8107 match(Set mem (StoreC mem zero));
8108
8109 ins_cost(125); // XXX
8110 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8111 ins_encode %{
8112 __ movw($mem$$Address, r12);
8113 %}
8114 ins_pipe(ialu_mem_reg);
8115 %}
8116
8117 instruct storeImmI16(memory mem, immI16 src)
8118 %{
8119 predicate(UseStoreImmI16);
8120 match(Set mem (StoreC mem src));
8121
8122 ins_cost(150);
8123 format %{ "movw $mem, $src\t# short/char" %}
8124 ins_encode %{
8125 __ movw($mem$$Address, $src$$constant);
8126 %}
8127 ins_pipe(ialu_mem_imm);
8128 %}
8129
8130 // Store Byte Immediate
8131 instruct storeImmB0(memory mem, immI_0 zero)
8132 %{
8133 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8134 match(Set mem (StoreB mem zero));
8135
8136 ins_cost(125); // XXX
8137 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8138 ins_encode %{
8139 __ movb($mem$$Address, r12);
8140 %}
8141 ins_pipe(ialu_mem_reg);
8142 %}
8143
8144 instruct storeImmB(memory mem, immI8 src)
8145 %{
8146 match(Set mem (StoreB mem src));
8147
8148 ins_cost(150); // XXX
8149 format %{ "movb $mem, $src\t# byte" %}
8150 ins_encode %{
8151 __ movb($mem$$Address, $src$$constant);
8152 %}
8153 ins_pipe(ialu_mem_imm);
8154 %}
8155
8156 // Store Float
8157 instruct storeF(memory mem, regF src)
8158 %{
8159 match(Set mem (StoreF mem src));
8160
8161 ins_cost(95); // XXX
8162 format %{ "movss $mem, $src\t# float" %}
8163 ins_encode %{
8164 __ movflt($mem$$Address, $src$$XMMRegister);
8165 %}
8166 ins_pipe(pipe_slow); // XXX
8167 %}
8168
8169 // Store immediate Float value (it is faster than store from XMM register)
8170 instruct storeF0(memory mem, immF0 zero)
8171 %{
8172 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8173 match(Set mem (StoreF mem zero));
8174
8175 ins_cost(25); // XXX
8176 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8177 ins_encode %{
8178 __ movl($mem$$Address, r12);
8179 %}
8180 ins_pipe(ialu_mem_reg);
8181 %}
8182
8183 instruct storeF_imm(memory mem, immF src)
8184 %{
8185 match(Set mem (StoreF mem src));
8186
8187 ins_cost(50);
8188 format %{ "movl $mem, $src\t# float" %}
8189 ins_encode %{
8190 __ movl($mem$$Address, jint_cast($src$$constant));
8191 %}
8192 ins_pipe(ialu_mem_imm);
8193 %}
8194
8195 // Store Double
8196 instruct storeD(memory mem, regD src)
8197 %{
8198 match(Set mem (StoreD mem src));
8199
8200 ins_cost(95); // XXX
8201 format %{ "movsd $mem, $src\t# double" %}
8202 ins_encode %{
8203 __ movdbl($mem$$Address, $src$$XMMRegister);
8204 %}
8205 ins_pipe(pipe_slow); // XXX
8206 %}
8207
8208 // Store immediate double 0.0 (it is faster than store from XMM register)
8209 instruct storeD0_imm(memory mem, immD0 src)
8210 %{
8211 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8212 match(Set mem (StoreD mem src));
8213
8214 ins_cost(50);
8215 format %{ "movq $mem, $src\t# double 0." %}
8216 ins_encode %{
8217 __ movq($mem$$Address, $src$$constant);
8218 %}
8219 ins_pipe(ialu_mem_imm);
8220 %}
8221
8222 instruct storeD0(memory mem, immD0 zero)
8223 %{
8224 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8225 match(Set mem (StoreD mem zero));
8226
8227 ins_cost(25); // XXX
8228 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8229 ins_encode %{
8230 __ movq($mem$$Address, r12);
8231 %}
8232 ins_pipe(ialu_mem_reg);
8233 %}
8234
8235 instruct storeSSI(stackSlotI dst, rRegI src)
8236 %{
8237 match(Set dst src);
8238
8239 ins_cost(100);
8240 format %{ "movl $dst, $src\t# int stk" %}
8241 ins_encode %{
8242 __ movl($dst$$Address, $src$$Register);
8243 %}
8244 ins_pipe( ialu_mem_reg );
8245 %}
8246
8247 instruct storeSSL(stackSlotL dst, rRegL src)
8248 %{
8249 match(Set dst src);
8250
8251 ins_cost(100);
8252 format %{ "movq $dst, $src\t# long stk" %}
8253 ins_encode %{
8254 __ movq($dst$$Address, $src$$Register);
8255 %}
8256 ins_pipe(ialu_mem_reg);
8257 %}
8258
8259 instruct storeSSP(stackSlotP dst, rRegP src)
8260 %{
8261 match(Set dst src);
8262
8263 ins_cost(100);
8264 format %{ "movq $dst, $src\t# ptr stk" %}
8265 ins_encode %{
8266 __ movq($dst$$Address, $src$$Register);
8267 %}
8268 ins_pipe(ialu_mem_reg);
8269 %}
8270
8271 instruct storeSSF(stackSlotF dst, regF src)
8272 %{
8273 match(Set dst src);
8274
8275 ins_cost(95); // XXX
8276 format %{ "movss $dst, $src\t# float stk" %}
8277 ins_encode %{
8278 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8279 %}
8280 ins_pipe(pipe_slow); // XXX
8281 %}
8282
8283 instruct storeSSD(stackSlotD dst, regD src)
8284 %{
8285 match(Set dst src);
8286
8287 ins_cost(95); // XXX
8288 format %{ "movsd $dst, $src\t# double stk" %}
8289 ins_encode %{
8290 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8291 %}
8292 ins_pipe(pipe_slow); // XXX
8293 %}
8294
8295 instruct cacheWB(indirect addr)
8296 %{
8297 predicate(VM_Version::supports_data_cache_line_flush());
8298 match(CacheWB addr);
8299
8300 ins_cost(100);
8301 format %{"cache wb $addr" %}
8302 ins_encode %{
8303 assert($addr->index_position() < 0, "should be");
8304 assert($addr$$disp == 0, "should be");
8305 __ cache_wb(Address($addr$$base$$Register, 0));
8306 %}
8307 ins_pipe(pipe_slow); // XXX
8308 %}
8309
8310 instruct cacheWBPreSync()
8311 %{
8312 predicate(VM_Version::supports_data_cache_line_flush());
8313 match(CacheWBPreSync);
8314
8315 ins_cost(100);
8316 format %{"cache wb presync" %}
8317 ins_encode %{
8318 __ cache_wbsync(true);
8319 %}
8320 ins_pipe(pipe_slow); // XXX
8321 %}
8322
8323 instruct cacheWBPostSync()
8324 %{
8325 predicate(VM_Version::supports_data_cache_line_flush());
8326 match(CacheWBPostSync);
8327
8328 ins_cost(100);
8329 format %{"cache wb postsync" %}
8330 ins_encode %{
8331 __ cache_wbsync(false);
8332 %}
8333 ins_pipe(pipe_slow); // XXX
8334 %}
8335
8336 //----------BSWAP Instructions-------------------------------------------------
8337 instruct bytes_reverse_int(rRegI dst) %{
8338 match(Set dst (ReverseBytesI dst));
8339
8340 format %{ "bswapl $dst" %}
8341 ins_encode %{
8342 __ bswapl($dst$$Register);
8343 %}
8344 ins_pipe( ialu_reg );
8345 %}
8346
8347 instruct bytes_reverse_long(rRegL dst) %{
8348 match(Set dst (ReverseBytesL dst));
8349
8350 format %{ "bswapq $dst" %}
8351 ins_encode %{
8352 __ bswapq($dst$$Register);
8353 %}
8354 ins_pipe( ialu_reg);
8355 %}
8356
8357 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8358 match(Set dst (ReverseBytesUS dst));
8359 effect(KILL cr);
8360
8361 format %{ "bswapl $dst\n\t"
8362 "shrl $dst,16\n\t" %}
8363 ins_encode %{
8364 __ bswapl($dst$$Register);
8365 __ shrl($dst$$Register, 16);
8366 %}
8367 ins_pipe( ialu_reg );
8368 %}
8369
8370 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8371 match(Set dst (ReverseBytesS dst));
8372 effect(KILL cr);
8373
8374 format %{ "bswapl $dst\n\t"
8375 "sar $dst,16\n\t" %}
8376 ins_encode %{
8377 __ bswapl($dst$$Register);
8378 __ sarl($dst$$Register, 16);
8379 %}
8380 ins_pipe( ialu_reg );
8381 %}
8382
8383 //---------- Zeros Count Instructions ------------------------------------------
8384
8385 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8386 predicate(UseCountLeadingZerosInstruction);
8387 match(Set dst (CountLeadingZerosI src));
8388 effect(KILL cr);
8389
8390 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8391 ins_encode %{
8392 __ lzcntl($dst$$Register, $src$$Register);
8393 %}
8394 ins_pipe(ialu_reg);
8395 %}
8396
8397 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8398 predicate(UseCountLeadingZerosInstruction);
8399 match(Set dst (CountLeadingZerosI (LoadI src)));
8400 effect(KILL cr);
8401 ins_cost(175);
8402 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8403 ins_encode %{
8404 __ lzcntl($dst$$Register, $src$$Address);
8405 %}
8406 ins_pipe(ialu_reg_mem);
8407 %}
8408
8409 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8410 predicate(!UseCountLeadingZerosInstruction);
8411 match(Set dst (CountLeadingZerosI src));
8412 effect(KILL cr);
8413
8414 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8415 "jnz skip\n\t"
8416 "movl $dst, -1\n"
8417 "skip:\n\t"
8418 "negl $dst\n\t"
8419 "addl $dst, 31" %}
8420 ins_encode %{
8421 Register Rdst = $dst$$Register;
8422 Register Rsrc = $src$$Register;
8423 Label skip;
8424 __ bsrl(Rdst, Rsrc);
8425 __ jccb(Assembler::notZero, skip);
8426 __ movl(Rdst, -1);
8427 __ bind(skip);
8428 __ negl(Rdst);
8429 __ addl(Rdst, BitsPerInt - 1);
8430 %}
8431 ins_pipe(ialu_reg);
8432 %}
8433
8434 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8435 predicate(UseCountLeadingZerosInstruction);
8436 match(Set dst (CountLeadingZerosL src));
8437 effect(KILL cr);
8438
8439 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8440 ins_encode %{
8441 __ lzcntq($dst$$Register, $src$$Register);
8442 %}
8443 ins_pipe(ialu_reg);
8444 %}
8445
8446 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8447 predicate(UseCountLeadingZerosInstruction);
8448 match(Set dst (CountLeadingZerosL (LoadL src)));
8449 effect(KILL cr);
8450 ins_cost(175);
8451 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8452 ins_encode %{
8453 __ lzcntq($dst$$Register, $src$$Address);
8454 %}
8455 ins_pipe(ialu_reg_mem);
8456 %}
8457
8458 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8459 predicate(!UseCountLeadingZerosInstruction);
8460 match(Set dst (CountLeadingZerosL src));
8461 effect(KILL cr);
8462
8463 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8464 "jnz skip\n\t"
8465 "movl $dst, -1\n"
8466 "skip:\n\t"
8467 "negl $dst\n\t"
8468 "addl $dst, 63" %}
8469 ins_encode %{
8470 Register Rdst = $dst$$Register;
8471 Register Rsrc = $src$$Register;
8472 Label skip;
8473 __ bsrq(Rdst, Rsrc);
8474 __ jccb(Assembler::notZero, skip);
8475 __ movl(Rdst, -1);
8476 __ bind(skip);
8477 __ negl(Rdst);
8478 __ addl(Rdst, BitsPerLong - 1);
8479 %}
8480 ins_pipe(ialu_reg);
8481 %}
8482
8483 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8484 predicate(UseCountTrailingZerosInstruction);
8485 match(Set dst (CountTrailingZerosI src));
8486 effect(KILL cr);
8487
8488 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8489 ins_encode %{
8490 __ tzcntl($dst$$Register, $src$$Register);
8491 %}
8492 ins_pipe(ialu_reg);
8493 %}
8494
8495 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8496 predicate(UseCountTrailingZerosInstruction);
8497 match(Set dst (CountTrailingZerosI (LoadI src)));
8498 effect(KILL cr);
8499 ins_cost(175);
8500 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8501 ins_encode %{
8502 __ tzcntl($dst$$Register, $src$$Address);
8503 %}
8504 ins_pipe(ialu_reg_mem);
8505 %}
8506
8507 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8508 predicate(!UseCountTrailingZerosInstruction);
8509 match(Set dst (CountTrailingZerosI src));
8510 effect(KILL cr);
8511
8512 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8513 "jnz done\n\t"
8514 "movl $dst, 32\n"
8515 "done:" %}
8516 ins_encode %{
8517 Register Rdst = $dst$$Register;
8518 Label done;
8519 __ bsfl(Rdst, $src$$Register);
8520 __ jccb(Assembler::notZero, done);
8521 __ movl(Rdst, BitsPerInt);
8522 __ bind(done);
8523 %}
8524 ins_pipe(ialu_reg);
8525 %}
8526
8527 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8528 predicate(UseCountTrailingZerosInstruction);
8529 match(Set dst (CountTrailingZerosL src));
8530 effect(KILL cr);
8531
8532 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8533 ins_encode %{
8534 __ tzcntq($dst$$Register, $src$$Register);
8535 %}
8536 ins_pipe(ialu_reg);
8537 %}
8538
8539 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8540 predicate(UseCountTrailingZerosInstruction);
8541 match(Set dst (CountTrailingZerosL (LoadL src)));
8542 effect(KILL cr);
8543 ins_cost(175);
8544 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8545 ins_encode %{
8546 __ tzcntq($dst$$Register, $src$$Address);
8547 %}
8548 ins_pipe(ialu_reg_mem);
8549 %}
8550
8551 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8552 predicate(!UseCountTrailingZerosInstruction);
8553 match(Set dst (CountTrailingZerosL src));
8554 effect(KILL cr);
8555
8556 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8557 "jnz done\n\t"
8558 "movl $dst, 64\n"
8559 "done:" %}
8560 ins_encode %{
8561 Register Rdst = $dst$$Register;
8562 Label done;
8563 __ bsfq(Rdst, $src$$Register);
8564 __ jccb(Assembler::notZero, done);
8565 __ movl(Rdst, BitsPerLong);
8566 __ bind(done);
8567 %}
8568 ins_pipe(ialu_reg);
8569 %}
8570
8571 //--------------- Reverse Operation Instructions ----------------
8572 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8573 predicate(!VM_Version::supports_gfni());
8574 match(Set dst (ReverseI src));
8575 effect(TEMP dst, TEMP rtmp, KILL cr);
8576 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8577 ins_encode %{
8578 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8579 %}
8580 ins_pipe( ialu_reg );
8581 %}
8582
8583 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8584 predicate(VM_Version::supports_gfni());
8585 match(Set dst (ReverseI src));
8586 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8587 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8588 ins_encode %{
8589 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8590 %}
8591 ins_pipe( ialu_reg );
8592 %}
8593
8594 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8595 predicate(!VM_Version::supports_gfni());
8596 match(Set dst (ReverseL src));
8597 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8598 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8599 ins_encode %{
8600 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8601 %}
8602 ins_pipe( ialu_reg );
8603 %}
8604
8605 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8606 predicate(VM_Version::supports_gfni());
8607 match(Set dst (ReverseL src));
8608 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8609 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8610 ins_encode %{
8611 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8612 %}
8613 ins_pipe( ialu_reg );
8614 %}
8615
8616 //---------- Population Count Instructions -------------------------------------
8617
8618 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8619 predicate(UsePopCountInstruction);
8620 match(Set dst (PopCountI src));
8621 effect(KILL cr);
8622
8623 format %{ "popcnt $dst, $src" %}
8624 ins_encode %{
8625 __ popcntl($dst$$Register, $src$$Register);
8626 %}
8627 ins_pipe(ialu_reg);
8628 %}
8629
8630 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8631 predicate(UsePopCountInstruction);
8632 match(Set dst (PopCountI (LoadI mem)));
8633 effect(KILL cr);
8634
8635 format %{ "popcnt $dst, $mem" %}
8636 ins_encode %{
8637 __ popcntl($dst$$Register, $mem$$Address);
8638 %}
8639 ins_pipe(ialu_reg);
8640 %}
8641
8642 // Note: Long.bitCount(long) returns an int.
8643 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8644 predicate(UsePopCountInstruction);
8645 match(Set dst (PopCountL src));
8646 effect(KILL cr);
8647
8648 format %{ "popcnt $dst, $src" %}
8649 ins_encode %{
8650 __ popcntq($dst$$Register, $src$$Register);
8651 %}
8652 ins_pipe(ialu_reg);
8653 %}
8654
8655 // Note: Long.bitCount(long) returns an int.
8656 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8657 predicate(UsePopCountInstruction);
8658 match(Set dst (PopCountL (LoadL mem)));
8659 effect(KILL cr);
8660
8661 format %{ "popcnt $dst, $mem" %}
8662 ins_encode %{
8663 __ popcntq($dst$$Register, $mem$$Address);
8664 %}
8665 ins_pipe(ialu_reg);
8666 %}
8667
8668
8669 //----------MemBar Instructions-----------------------------------------------
8670 // Memory barrier flavors
8671
8672 instruct membar_acquire()
8673 %{
8674 match(MemBarAcquire);
8675 match(LoadFence);
8676 ins_cost(0);
8677
8678 size(0);
8679 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8680 ins_encode();
8681 ins_pipe(empty);
8682 %}
8683
8684 instruct membar_acquire_lock()
8685 %{
8686 match(MemBarAcquireLock);
8687 ins_cost(0);
8688
8689 size(0);
8690 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8691 ins_encode();
8692 ins_pipe(empty);
8693 %}
8694
8695 instruct membar_release()
8696 %{
8697 match(MemBarRelease);
8698 match(StoreFence);
8699 ins_cost(0);
8700
8701 size(0);
8702 format %{ "MEMBAR-release ! (empty encoding)" %}
8703 ins_encode();
8704 ins_pipe(empty);
8705 %}
8706
8707 instruct membar_release_lock()
8708 %{
8709 match(MemBarReleaseLock);
8710 ins_cost(0);
8711
8712 size(0);
8713 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8714 ins_encode();
8715 ins_pipe(empty);
8716 %}
8717
8718 instruct membar_volatile(rFlagsReg cr) %{
8719 match(MemBarVolatile);
8720 effect(KILL cr);
8721 ins_cost(400);
8722
8723 format %{
8724 $$template
8725 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8726 %}
8727 ins_encode %{
8728 __ membar(Assembler::StoreLoad);
8729 %}
8730 ins_pipe(pipe_slow);
8731 %}
8732
8733 instruct unnecessary_membar_volatile()
8734 %{
8735 match(MemBarVolatile);
8736 predicate(Matcher::post_store_load_barrier(n));
8737 ins_cost(0);
8738
8739 size(0);
8740 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8741 ins_encode();
8742 ins_pipe(empty);
8743 %}
8744
8745 instruct membar_storestore() %{
8746 match(MemBarStoreStore);
8747 match(StoreStoreFence);
8748 ins_cost(0);
8749
8750 size(0);
8751 format %{ "MEMBAR-storestore (empty encoding)" %}
8752 ins_encode( );
8753 ins_pipe(empty);
8754 %}
8755
8756 //----------Move Instructions--------------------------------------------------
8757
8758 instruct castX2P(rRegP dst, rRegL src)
8759 %{
8760 match(Set dst (CastX2P src));
8761
8762 format %{ "movq $dst, $src\t# long->ptr" %}
8763 ins_encode %{
8764 if ($dst$$reg != $src$$reg) {
8765 __ movptr($dst$$Register, $src$$Register);
8766 }
8767 %}
8768 ins_pipe(ialu_reg_reg); // XXX
8769 %}
8770
8771 instruct castP2X(rRegL dst, rRegP src)
8772 %{
8773 match(Set dst (CastP2X src));
8774
8775 format %{ "movq $dst, $src\t# ptr -> long" %}
8776 ins_encode %{
8777 if ($dst$$reg != $src$$reg) {
8778 __ movptr($dst$$Register, $src$$Register);
8779 }
8780 %}
8781 ins_pipe(ialu_reg_reg); // XXX
8782 %}
8783
8784 // Convert oop into int for vectors alignment masking
8785 instruct convP2I(rRegI dst, rRegP src)
8786 %{
8787 match(Set dst (ConvL2I (CastP2X src)));
8788
8789 format %{ "movl $dst, $src\t# ptr -> int" %}
8790 ins_encode %{
8791 __ movl($dst$$Register, $src$$Register);
8792 %}
8793 ins_pipe(ialu_reg_reg); // XXX
8794 %}
8795
8796 // Convert compressed oop into int for vectors alignment masking
8797 // in case of 32bit oops (heap < 4Gb).
8798 instruct convN2I(rRegI dst, rRegN src)
8799 %{
8800 predicate(CompressedOops::shift() == 0);
8801 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8802
8803 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8804 ins_encode %{
8805 __ movl($dst$$Register, $src$$Register);
8806 %}
8807 ins_pipe(ialu_reg_reg); // XXX
8808 %}
8809
8810 // Convert oop pointer into compressed form
8811 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8812 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8813 match(Set dst (EncodeP src));
8814 effect(KILL cr);
8815 format %{ "encode_heap_oop $dst,$src" %}
8816 ins_encode %{
8817 Register s = $src$$Register;
8818 Register d = $dst$$Register;
8819 if (s != d) {
8820 __ movq(d, s);
8821 }
8822 __ encode_heap_oop(d);
8823 %}
8824 ins_pipe(ialu_reg_long);
8825 %}
8826
8827 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8828 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8829 match(Set dst (EncodeP src));
8830 effect(KILL cr);
8831 format %{ "encode_heap_oop_not_null $dst,$src" %}
8832 ins_encode %{
8833 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8834 %}
8835 ins_pipe(ialu_reg_long);
8836 %}
8837
8838 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8839 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8840 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8841 match(Set dst (DecodeN src));
8842 effect(KILL cr);
8843 format %{ "decode_heap_oop $dst,$src" %}
8844 ins_encode %{
8845 Register s = $src$$Register;
8846 Register d = $dst$$Register;
8847 if (s != d) {
8848 __ movq(d, s);
8849 }
8850 __ decode_heap_oop(d);
8851 %}
8852 ins_pipe(ialu_reg_long);
8853 %}
8854
8855 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8856 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8857 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8858 match(Set dst (DecodeN src));
8859 effect(KILL cr);
8860 format %{ "decode_heap_oop_not_null $dst,$src" %}
8861 ins_encode %{
8862 Register s = $src$$Register;
8863 Register d = $dst$$Register;
8864 if (s != d) {
8865 __ decode_heap_oop_not_null(d, s);
8866 } else {
8867 __ decode_heap_oop_not_null(d);
8868 }
8869 %}
8870 ins_pipe(ialu_reg_long);
8871 %}
8872
8873 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8874 match(Set dst (EncodePKlass src));
8875 effect(TEMP dst, KILL cr);
8876 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8877 ins_encode %{
8878 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8879 %}
8880 ins_pipe(ialu_reg_long);
8881 %}
8882
8883 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8884 match(Set dst (DecodeNKlass src));
8885 effect(TEMP dst, KILL cr);
8886 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8887 ins_encode %{
8888 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8889 %}
8890 ins_pipe(ialu_reg_long);
8891 %}
8892
8893 //----------Conditional Move---------------------------------------------------
8894 // Jump
8895 // dummy instruction for generating temp registers
8896 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8897 match(Jump (LShiftL switch_val shift));
8898 ins_cost(350);
8899 predicate(false);
8900 effect(TEMP dest);
8901
8902 format %{ "leaq $dest, [$constantaddress]\n\t"
8903 "jmp [$dest + $switch_val << $shift]\n\t" %}
8904 ins_encode %{
8905 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8906 // to do that and the compiler is using that register as one it can allocate.
8907 // So we build it all by hand.
8908 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
8909 // ArrayAddress dispatch(table, index);
8910 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
8911 __ lea($dest$$Register, $constantaddress);
8912 __ jmp(dispatch);
8913 %}
8914 ins_pipe(pipe_jmp);
8915 %}
8916
8917 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8918 match(Jump (AddL (LShiftL switch_val shift) offset));
8919 ins_cost(350);
8920 effect(TEMP dest);
8921
8922 format %{ "leaq $dest, [$constantaddress]\n\t"
8923 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
8924 ins_encode %{
8925 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8926 // to do that and the compiler is using that register as one it can allocate.
8927 // So we build it all by hand.
8928 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8929 // ArrayAddress dispatch(table, index);
8930 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8931 __ lea($dest$$Register, $constantaddress);
8932 __ jmp(dispatch);
8933 %}
8934 ins_pipe(pipe_jmp);
8935 %}
8936
8937 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8938 match(Jump switch_val);
8939 ins_cost(350);
8940 effect(TEMP dest);
8941
8942 format %{ "leaq $dest, [$constantaddress]\n\t"
8943 "jmp [$dest + $switch_val]\n\t" %}
8944 ins_encode %{
8945 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8946 // to do that and the compiler is using that register as one it can allocate.
8947 // So we build it all by hand.
8948 // Address index(noreg, switch_reg, Address::times_1);
8949 // ArrayAddress dispatch(table, index);
8950 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
8951 __ lea($dest$$Register, $constantaddress);
8952 __ jmp(dispatch);
8953 %}
8954 ins_pipe(pipe_jmp);
8955 %}
8956
8957 // Conditional move
8958 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
8959 %{
8960 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
8961 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
8962
8963 ins_cost(100); // XXX
8964 format %{ "setbn$cop $dst\t# signed, int" %}
8965 ins_encode %{
8966 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
8967 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
8968 %}
8969 ins_pipe(ialu_reg);
8970 %}
8971
8972 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
8973 %{
8974 predicate(!UseAPX);
8975 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
8976
8977 ins_cost(200); // XXX
8978 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
8979 ins_encode %{
8980 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
8981 %}
8982 ins_pipe(pipe_cmov_reg);
8983 %}
8984
8985 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
8986 %{
8987 predicate(UseAPX);
8988 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
8989
8990 ins_cost(200);
8991 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
8992 ins_encode %{
8993 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
8994 %}
8995 ins_pipe(pipe_cmov_reg);
8996 %}
8997
8998 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
8999 %{
9000 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9001 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9002
9003 ins_cost(100); // XXX
9004 format %{ "setbn$cop $dst\t# unsigned, int" %}
9005 ins_encode %{
9006 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9007 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9008 %}
9009 ins_pipe(ialu_reg);
9010 %}
9011
9012 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9013 predicate(!UseAPX);
9014 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9015
9016 ins_cost(200); // XXX
9017 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9018 ins_encode %{
9019 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9020 %}
9021 ins_pipe(pipe_cmov_reg);
9022 %}
9023
9024 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9025 predicate(UseAPX);
9026 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9027
9028 ins_cost(200);
9029 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9030 ins_encode %{
9031 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9032 %}
9033 ins_pipe(pipe_cmov_reg);
9034 %}
9035
9036 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9037 %{
9038 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9039 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9040
9041 ins_cost(100); // XXX
9042 format %{ "setbn$cop $dst\t# unsigned, int" %}
9043 ins_encode %{
9044 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9045 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9046 %}
9047 ins_pipe(ialu_reg);
9048 %}
9049
9050 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9051 predicate(!UseAPX);
9052 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9053 ins_cost(200);
9054 expand %{
9055 cmovI_regU(cop, cr, dst, src);
9056 %}
9057 %}
9058
9059 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9060 predicate(UseAPX);
9061 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9062 ins_cost(200);
9063 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9064 ins_encode %{
9065 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9066 %}
9067 ins_pipe(pipe_cmov_reg);
9068 %}
9069
9070 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9071 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9072 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9073
9074 ins_cost(200); // XXX
9075 format %{ "cmovpl $dst, $src\n\t"
9076 "cmovnel $dst, $src" %}
9077 ins_encode %{
9078 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9079 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9080 %}
9081 ins_pipe(pipe_cmov_reg);
9082 %}
9083
9084 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9085 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9086 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9087 effect(TEMP dst);
9088
9089 ins_cost(200);
9090 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9091 "cmovnel $dst, $src2" %}
9092 ins_encode %{
9093 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9094 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9095 %}
9096 ins_pipe(pipe_cmov_reg);
9097 %}
9098
9099 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9100 // inputs of the CMove
9101 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9102 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9103 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9104 effect(TEMP dst);
9105
9106 ins_cost(200); // XXX
9107 format %{ "cmovpl $dst, $src\n\t"
9108 "cmovnel $dst, $src" %}
9109 ins_encode %{
9110 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9111 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9112 %}
9113 ins_pipe(pipe_cmov_reg);
9114 %}
9115
9116 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9117 // and parity flag bit is set if any of the operand is a NaN.
9118 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9119 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9120 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9121 effect(TEMP dst);
9122
9123 ins_cost(200);
9124 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9125 "cmovnel $dst, $src2" %}
9126 ins_encode %{
9127 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9128 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9129 %}
9130 ins_pipe(pipe_cmov_reg);
9131 %}
9132
9133 // Conditional move
9134 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9135 predicate(!UseAPX);
9136 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9137
9138 ins_cost(250); // XXX
9139 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9140 ins_encode %{
9141 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9142 %}
9143 ins_pipe(pipe_cmov_mem);
9144 %}
9145
9146 // Conditional move
9147 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9148 %{
9149 predicate(UseAPX);
9150 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9151
9152 ins_cost(250);
9153 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9154 ins_encode %{
9155 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9156 %}
9157 ins_pipe(pipe_cmov_mem);
9158 %}
9159
9160 // Conditional move
9161 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9162 %{
9163 predicate(!UseAPX);
9164 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9165
9166 ins_cost(250); // XXX
9167 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9168 ins_encode %{
9169 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9170 %}
9171 ins_pipe(pipe_cmov_mem);
9172 %}
9173
9174 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9175 predicate(!UseAPX);
9176 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9177 ins_cost(250);
9178 expand %{
9179 cmovI_memU(cop, cr, dst, src);
9180 %}
9181 %}
9182
9183 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9184 %{
9185 predicate(UseAPX);
9186 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9187
9188 ins_cost(250);
9189 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9190 ins_encode %{
9191 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9192 %}
9193 ins_pipe(pipe_cmov_mem);
9194 %}
9195
9196 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9197 %{
9198 predicate(UseAPX);
9199 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9200 ins_cost(250);
9201 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9202 ins_encode %{
9203 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9204 %}
9205 ins_pipe(pipe_cmov_mem);
9206 %}
9207
9208 // Conditional move
9209 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9210 %{
9211 predicate(!UseAPX);
9212 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9213
9214 ins_cost(200); // XXX
9215 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9216 ins_encode %{
9217 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9218 %}
9219 ins_pipe(pipe_cmov_reg);
9220 %}
9221
9222 // Conditional move ndd
9223 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9224 %{
9225 predicate(UseAPX);
9226 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9227
9228 ins_cost(200);
9229 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9230 ins_encode %{
9231 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9232 %}
9233 ins_pipe(pipe_cmov_reg);
9234 %}
9235
9236 // Conditional move
9237 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9238 %{
9239 predicate(!UseAPX);
9240 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9241
9242 ins_cost(200); // XXX
9243 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9244 ins_encode %{
9245 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9246 %}
9247 ins_pipe(pipe_cmov_reg);
9248 %}
9249
9250 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9251 predicate(!UseAPX);
9252 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9253 ins_cost(200);
9254 expand %{
9255 cmovN_regU(cop, cr, dst, src);
9256 %}
9257 %}
9258
9259 // Conditional move ndd
9260 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9261 %{
9262 predicate(UseAPX);
9263 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9264
9265 ins_cost(200);
9266 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9267 ins_encode %{
9268 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9269 %}
9270 ins_pipe(pipe_cmov_reg);
9271 %}
9272
9273 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9274 predicate(UseAPX);
9275 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9276 ins_cost(200);
9277 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9278 ins_encode %{
9279 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9280 %}
9281 ins_pipe(pipe_cmov_reg);
9282 %}
9283
9284 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9285 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9286 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9287
9288 ins_cost(200); // XXX
9289 format %{ "cmovpl $dst, $src\n\t"
9290 "cmovnel $dst, $src" %}
9291 ins_encode %{
9292 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9293 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9294 %}
9295 ins_pipe(pipe_cmov_reg);
9296 %}
9297
9298 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9299 // inputs of the CMove
9300 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9301 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9302 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9303
9304 ins_cost(200); // XXX
9305 format %{ "cmovpl $dst, $src\n\t"
9306 "cmovnel $dst, $src" %}
9307 ins_encode %{
9308 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9309 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9310 %}
9311 ins_pipe(pipe_cmov_reg);
9312 %}
9313
9314 // Conditional move
9315 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9316 %{
9317 predicate(!UseAPX);
9318 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9319
9320 ins_cost(200); // XXX
9321 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9322 ins_encode %{
9323 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9324 %}
9325 ins_pipe(pipe_cmov_reg); // XXX
9326 %}
9327
9328 // Conditional move ndd
9329 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9330 %{
9331 predicate(UseAPX);
9332 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9333
9334 ins_cost(200);
9335 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9336 ins_encode %{
9337 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9338 %}
9339 ins_pipe(pipe_cmov_reg);
9340 %}
9341
9342 // Conditional move
9343 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9344 %{
9345 predicate(!UseAPX);
9346 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9347
9348 ins_cost(200); // XXX
9349 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9350 ins_encode %{
9351 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9352 %}
9353 ins_pipe(pipe_cmov_reg); // XXX
9354 %}
9355
9356 // Conditional move ndd
9357 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9358 %{
9359 predicate(UseAPX);
9360 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9361
9362 ins_cost(200);
9363 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9364 ins_encode %{
9365 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9366 %}
9367 ins_pipe(pipe_cmov_reg);
9368 %}
9369
9370 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9371 predicate(!UseAPX);
9372 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9373 ins_cost(200);
9374 expand %{
9375 cmovP_regU(cop, cr, dst, src);
9376 %}
9377 %}
9378
9379 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9380 predicate(UseAPX);
9381 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9382 ins_cost(200);
9383 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9384 ins_encode %{
9385 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9386 %}
9387 ins_pipe(pipe_cmov_reg);
9388 %}
9389
9390 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9391 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9392 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9393
9394 ins_cost(200); // XXX
9395 format %{ "cmovpq $dst, $src\n\t"
9396 "cmovneq $dst, $src" %}
9397 ins_encode %{
9398 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9399 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9400 %}
9401 ins_pipe(pipe_cmov_reg);
9402 %}
9403
9404 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9405 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9406 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9407 effect(TEMP dst);
9408
9409 ins_cost(200);
9410 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9411 "cmovneq $dst, $src2" %}
9412 ins_encode %{
9413 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9414 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9415 %}
9416 ins_pipe(pipe_cmov_reg);
9417 %}
9418
9419 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9420 // inputs of the CMove
9421 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9422 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9423 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9424
9425 ins_cost(200); // XXX
9426 format %{ "cmovpq $dst, $src\n\t"
9427 "cmovneq $dst, $src" %}
9428 ins_encode %{
9429 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9430 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9431 %}
9432 ins_pipe(pipe_cmov_reg);
9433 %}
9434
9435 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9436 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9437 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9438 effect(TEMP dst);
9439
9440 ins_cost(200);
9441 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9442 "cmovneq $dst, $src2" %}
9443 ins_encode %{
9444 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9445 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9446 %}
9447 ins_pipe(pipe_cmov_reg);
9448 %}
9449
9450 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9451 %{
9452 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9453 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9454
9455 ins_cost(100); // XXX
9456 format %{ "setbn$cop $dst\t# signed, long" %}
9457 ins_encode %{
9458 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9459 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9460 %}
9461 ins_pipe(ialu_reg);
9462 %}
9463
9464 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9465 %{
9466 predicate(!UseAPX);
9467 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9468
9469 ins_cost(200); // XXX
9470 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9471 ins_encode %{
9472 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9473 %}
9474 ins_pipe(pipe_cmov_reg); // XXX
9475 %}
9476
9477 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9478 %{
9479 predicate(UseAPX);
9480 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9481
9482 ins_cost(200);
9483 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9484 ins_encode %{
9485 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9486 %}
9487 ins_pipe(pipe_cmov_reg);
9488 %}
9489
9490 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9491 %{
9492 predicate(!UseAPX);
9493 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9494
9495 ins_cost(200); // XXX
9496 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9497 ins_encode %{
9498 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9499 %}
9500 ins_pipe(pipe_cmov_mem); // XXX
9501 %}
9502
9503 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9504 %{
9505 predicate(UseAPX);
9506 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9507
9508 ins_cost(200);
9509 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9510 ins_encode %{
9511 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9512 %}
9513 ins_pipe(pipe_cmov_mem);
9514 %}
9515
9516 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9517 %{
9518 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9519 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9520
9521 ins_cost(100); // XXX
9522 format %{ "setbn$cop $dst\t# unsigned, long" %}
9523 ins_encode %{
9524 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9525 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9526 %}
9527 ins_pipe(ialu_reg);
9528 %}
9529
9530 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9531 %{
9532 predicate(!UseAPX);
9533 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9534
9535 ins_cost(200); // XXX
9536 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9537 ins_encode %{
9538 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9539 %}
9540 ins_pipe(pipe_cmov_reg); // XXX
9541 %}
9542
9543 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9544 %{
9545 predicate(UseAPX);
9546 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9547
9548 ins_cost(200);
9549 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9550 ins_encode %{
9551 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9552 %}
9553 ins_pipe(pipe_cmov_reg);
9554 %}
9555
9556 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9557 %{
9558 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9559 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9560
9561 ins_cost(100); // XXX
9562 format %{ "setbn$cop $dst\t# unsigned, long" %}
9563 ins_encode %{
9564 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9565 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9566 %}
9567 ins_pipe(ialu_reg);
9568 %}
9569
9570 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9571 predicate(!UseAPX);
9572 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9573 ins_cost(200);
9574 expand %{
9575 cmovL_regU(cop, cr, dst, src);
9576 %}
9577 %}
9578
9579 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9580 %{
9581 predicate(UseAPX);
9582 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9583 ins_cost(200);
9584 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9585 ins_encode %{
9586 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9587 %}
9588 ins_pipe(pipe_cmov_reg);
9589 %}
9590
9591 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9592 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9593 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9594
9595 ins_cost(200); // XXX
9596 format %{ "cmovpq $dst, $src\n\t"
9597 "cmovneq $dst, $src" %}
9598 ins_encode %{
9599 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9600 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9601 %}
9602 ins_pipe(pipe_cmov_reg);
9603 %}
9604
9605 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9606 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9607 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9608 effect(TEMP dst);
9609
9610 ins_cost(200);
9611 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9612 "cmovneq $dst, $src2" %}
9613 ins_encode %{
9614 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9615 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9616 %}
9617 ins_pipe(pipe_cmov_reg);
9618 %}
9619
9620 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9621 // inputs of the CMove
9622 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9623 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9624 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9625
9626 ins_cost(200); // XXX
9627 format %{ "cmovpq $dst, $src\n\t"
9628 "cmovneq $dst, $src" %}
9629 ins_encode %{
9630 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9631 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9632 %}
9633 ins_pipe(pipe_cmov_reg);
9634 %}
9635
9636 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9637 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9638 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9639 effect(TEMP dst);
9640
9641 ins_cost(200);
9642 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9643 "cmovneq $dst, $src2" %}
9644 ins_encode %{
9645 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9646 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9647 %}
9648 ins_pipe(pipe_cmov_reg);
9649 %}
9650
9651 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9652 %{
9653 predicate(!UseAPX);
9654 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9655
9656 ins_cost(200); // XXX
9657 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9658 ins_encode %{
9659 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9660 %}
9661 ins_pipe(pipe_cmov_mem); // XXX
9662 %}
9663
9664 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9665 predicate(!UseAPX);
9666 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9667 ins_cost(200);
9668 expand %{
9669 cmovL_memU(cop, cr, dst, src);
9670 %}
9671 %}
9672
9673 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9674 %{
9675 predicate(UseAPX);
9676 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9677
9678 ins_cost(200);
9679 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9680 ins_encode %{
9681 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9682 %}
9683 ins_pipe(pipe_cmov_mem);
9684 %}
9685
9686 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9687 %{
9688 predicate(UseAPX);
9689 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9690 ins_cost(200);
9691 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9692 ins_encode %{
9693 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9694 %}
9695 ins_pipe(pipe_cmov_mem);
9696 %}
9697
9698 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9699 %{
9700 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9701
9702 ins_cost(200); // XXX
9703 format %{ "jn$cop skip\t# signed cmove float\n\t"
9704 "movss $dst, $src\n"
9705 "skip:" %}
9706 ins_encode %{
9707 Label Lskip;
9708 // Invert sense of branch from sense of CMOV
9709 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9710 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9711 __ bind(Lskip);
9712 %}
9713 ins_pipe(pipe_slow);
9714 %}
9715
9716 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9717 %{
9718 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9719
9720 ins_cost(200); // XXX
9721 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9722 "movss $dst, $src\n"
9723 "skip:" %}
9724 ins_encode %{
9725 Label Lskip;
9726 // Invert sense of branch from sense of CMOV
9727 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9728 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9729 __ bind(Lskip);
9730 %}
9731 ins_pipe(pipe_slow);
9732 %}
9733
9734 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9735 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9736 ins_cost(200);
9737 expand %{
9738 cmovF_regU(cop, cr, dst, src);
9739 %}
9740 %}
9741
9742 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9743 %{
9744 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9745
9746 ins_cost(200); // XXX
9747 format %{ "jn$cop skip\t# signed cmove double\n\t"
9748 "movsd $dst, $src\n"
9749 "skip:" %}
9750 ins_encode %{
9751 Label Lskip;
9752 // Invert sense of branch from sense of CMOV
9753 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9754 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9755 __ bind(Lskip);
9756 %}
9757 ins_pipe(pipe_slow);
9758 %}
9759
9760 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9761 %{
9762 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9763
9764 ins_cost(200); // XXX
9765 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9766 "movsd $dst, $src\n"
9767 "skip:" %}
9768 ins_encode %{
9769 Label Lskip;
9770 // Invert sense of branch from sense of CMOV
9771 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9772 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9773 __ bind(Lskip);
9774 %}
9775 ins_pipe(pipe_slow);
9776 %}
9777
9778 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9779 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9780 ins_cost(200);
9781 expand %{
9782 cmovD_regU(cop, cr, dst, src);
9783 %}
9784 %}
9785
9786 //----------Arithmetic Instructions--------------------------------------------
9787 //----------Addition Instructions----------------------------------------------
9788
9789 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9790 %{
9791 predicate(!UseAPX);
9792 match(Set dst (AddI dst src));
9793 effect(KILL cr);
9794 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9795 format %{ "addl $dst, $src\t# int" %}
9796 ins_encode %{
9797 __ addl($dst$$Register, $src$$Register);
9798 %}
9799 ins_pipe(ialu_reg_reg);
9800 %}
9801
9802 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9803 %{
9804 predicate(UseAPX);
9805 match(Set dst (AddI src1 src2));
9806 effect(KILL cr);
9807 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9808
9809 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9810 ins_encode %{
9811 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9812 %}
9813 ins_pipe(ialu_reg_reg);
9814 %}
9815
9816 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9817 %{
9818 predicate(!UseAPX);
9819 match(Set dst (AddI dst src));
9820 effect(KILL cr);
9821 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9822
9823 format %{ "addl $dst, $src\t# int" %}
9824 ins_encode %{
9825 __ addl($dst$$Register, $src$$constant);
9826 %}
9827 ins_pipe( ialu_reg );
9828 %}
9829
9830 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9831 %{
9832 predicate(UseAPX);
9833 match(Set dst (AddI src1 src2));
9834 effect(KILL cr);
9835 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9836
9837 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9838 ins_encode %{
9839 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9840 %}
9841 ins_pipe( ialu_reg );
9842 %}
9843
9844 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9845 %{
9846 predicate(UseAPX);
9847 match(Set dst (AddI (LoadI src1) src2));
9848 effect(KILL cr);
9849 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9850
9851 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9852 ins_encode %{
9853 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9854 %}
9855 ins_pipe( ialu_reg );
9856 %}
9857
9858 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9859 %{
9860 predicate(!UseAPX);
9861 match(Set dst (AddI dst (LoadI src)));
9862 effect(KILL cr);
9863 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9864
9865 ins_cost(150); // XXX
9866 format %{ "addl $dst, $src\t# int" %}
9867 ins_encode %{
9868 __ addl($dst$$Register, $src$$Address);
9869 %}
9870 ins_pipe(ialu_reg_mem);
9871 %}
9872
9873 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9874 %{
9875 predicate(UseAPX);
9876 match(Set dst (AddI src1 (LoadI src2)));
9877 effect(KILL cr);
9878 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9879
9880 ins_cost(150);
9881 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9882 ins_encode %{
9883 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9884 %}
9885 ins_pipe(ialu_reg_mem);
9886 %}
9887
9888 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9889 %{
9890 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9891 effect(KILL cr);
9892 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9893
9894 ins_cost(150); // XXX
9895 format %{ "addl $dst, $src\t# int" %}
9896 ins_encode %{
9897 __ addl($dst$$Address, $src$$Register);
9898 %}
9899 ins_pipe(ialu_mem_reg);
9900 %}
9901
9902 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9903 %{
9904 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9905 effect(KILL cr);
9906 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9907
9908
9909 ins_cost(125); // XXX
9910 format %{ "addl $dst, $src\t# int" %}
9911 ins_encode %{
9912 __ addl($dst$$Address, $src$$constant);
9913 %}
9914 ins_pipe(ialu_mem_imm);
9915 %}
9916
9917 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9918 %{
9919 predicate(!UseAPX && UseIncDec);
9920 match(Set dst (AddI dst src));
9921 effect(KILL cr);
9922
9923 format %{ "incl $dst\t# int" %}
9924 ins_encode %{
9925 __ incrementl($dst$$Register);
9926 %}
9927 ins_pipe(ialu_reg);
9928 %}
9929
9930 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9931 %{
9932 predicate(UseAPX && UseIncDec);
9933 match(Set dst (AddI src val));
9934 effect(KILL cr);
9935
9936 format %{ "eincl $dst, $src\t# int ndd" %}
9937 ins_encode %{
9938 __ eincl($dst$$Register, $src$$Register, false);
9939 %}
9940 ins_pipe(ialu_reg);
9941 %}
9942
9943 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
9944 %{
9945 predicate(UseAPX && UseIncDec);
9946 match(Set dst (AddI (LoadI src) val));
9947 effect(KILL cr);
9948
9949 format %{ "eincl $dst, $src\t# int ndd" %}
9950 ins_encode %{
9951 __ eincl($dst$$Register, $src$$Address, false);
9952 %}
9953 ins_pipe(ialu_reg);
9954 %}
9955
9956 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
9957 %{
9958 predicate(UseIncDec);
9959 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9960 effect(KILL cr);
9961
9962 ins_cost(125); // XXX
9963 format %{ "incl $dst\t# int" %}
9964 ins_encode %{
9965 __ incrementl($dst$$Address);
9966 %}
9967 ins_pipe(ialu_mem_imm);
9968 %}
9969
9970 // XXX why does that use AddI
9971 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
9972 %{
9973 predicate(!UseAPX && UseIncDec);
9974 match(Set dst (AddI dst src));
9975 effect(KILL cr);
9976
9977 format %{ "decl $dst\t# int" %}
9978 ins_encode %{
9979 __ decrementl($dst$$Register);
9980 %}
9981 ins_pipe(ialu_reg);
9982 %}
9983
9984 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
9985 %{
9986 predicate(UseAPX && UseIncDec);
9987 match(Set dst (AddI src val));
9988 effect(KILL cr);
9989
9990 format %{ "edecl $dst, $src\t# int ndd" %}
9991 ins_encode %{
9992 __ edecl($dst$$Register, $src$$Register, false);
9993 %}
9994 ins_pipe(ialu_reg);
9995 %}
9996
9997 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
9998 %{
9999 predicate(UseAPX && UseIncDec);
10000 match(Set dst (AddI (LoadI src) val));
10001 effect(KILL cr);
10002
10003 format %{ "edecl $dst, $src\t# int ndd" %}
10004 ins_encode %{
10005 __ edecl($dst$$Register, $src$$Address, false);
10006 %}
10007 ins_pipe(ialu_reg);
10008 %}
10009
10010 // XXX why does that use AddI
10011 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10012 %{
10013 predicate(UseIncDec);
10014 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10015 effect(KILL cr);
10016
10017 ins_cost(125); // XXX
10018 format %{ "decl $dst\t# int" %}
10019 ins_encode %{
10020 __ decrementl($dst$$Address);
10021 %}
10022 ins_pipe(ialu_mem_imm);
10023 %}
10024
10025 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10026 %{
10027 predicate(VM_Version::supports_fast_2op_lea());
10028 match(Set dst (AddI (LShiftI index scale) disp));
10029
10030 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10031 ins_encode %{
10032 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10033 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10034 %}
10035 ins_pipe(ialu_reg_reg);
10036 %}
10037
10038 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10039 %{
10040 predicate(VM_Version::supports_fast_3op_lea());
10041 match(Set dst (AddI (AddI base index) disp));
10042
10043 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10044 ins_encode %{
10045 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10046 %}
10047 ins_pipe(ialu_reg_reg);
10048 %}
10049
10050 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10051 %{
10052 predicate(VM_Version::supports_fast_2op_lea());
10053 match(Set dst (AddI base (LShiftI index scale)));
10054
10055 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10056 ins_encode %{
10057 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10058 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10059 %}
10060 ins_pipe(ialu_reg_reg);
10061 %}
10062
10063 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10064 %{
10065 predicate(VM_Version::supports_fast_3op_lea());
10066 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10067
10068 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10069 ins_encode %{
10070 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10071 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10072 %}
10073 ins_pipe(ialu_reg_reg);
10074 %}
10075
10076 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10077 %{
10078 predicate(!UseAPX);
10079 match(Set dst (AddL dst src));
10080 effect(KILL cr);
10081 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10082
10083 format %{ "addq $dst, $src\t# long" %}
10084 ins_encode %{
10085 __ addq($dst$$Register, $src$$Register);
10086 %}
10087 ins_pipe(ialu_reg_reg);
10088 %}
10089
10090 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10091 %{
10092 predicate(UseAPX);
10093 match(Set dst (AddL src1 src2));
10094 effect(KILL cr);
10095 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10096
10097 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10098 ins_encode %{
10099 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10100 %}
10101 ins_pipe(ialu_reg_reg);
10102 %}
10103
10104 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10105 %{
10106 predicate(!UseAPX);
10107 match(Set dst (AddL dst src));
10108 effect(KILL cr);
10109 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10110
10111 format %{ "addq $dst, $src\t# long" %}
10112 ins_encode %{
10113 __ addq($dst$$Register, $src$$constant);
10114 %}
10115 ins_pipe( ialu_reg );
10116 %}
10117
10118 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10119 %{
10120 predicate(UseAPX);
10121 match(Set dst (AddL src1 src2));
10122 effect(KILL cr);
10123 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10124
10125 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10126 ins_encode %{
10127 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10128 %}
10129 ins_pipe( ialu_reg );
10130 %}
10131
10132 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10133 %{
10134 predicate(UseAPX);
10135 match(Set dst (AddL (LoadL src1) src2));
10136 effect(KILL cr);
10137 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10138
10139 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10140 ins_encode %{
10141 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10142 %}
10143 ins_pipe( ialu_reg );
10144 %}
10145
10146 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10147 %{
10148 predicate(!UseAPX);
10149 match(Set dst (AddL dst (LoadL src)));
10150 effect(KILL cr);
10151 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10152
10153 ins_cost(150); // XXX
10154 format %{ "addq $dst, $src\t# long" %}
10155 ins_encode %{
10156 __ addq($dst$$Register, $src$$Address);
10157 %}
10158 ins_pipe(ialu_reg_mem);
10159 %}
10160
10161 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10162 %{
10163 predicate(UseAPX);
10164 match(Set dst (AddL src1 (LoadL src2)));
10165 effect(KILL cr);
10166 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10167
10168 ins_cost(150);
10169 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10170 ins_encode %{
10171 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10172 %}
10173 ins_pipe(ialu_reg_mem);
10174 %}
10175
10176 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10177 %{
10178 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10179 effect(KILL cr);
10180 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10181
10182 ins_cost(150); // XXX
10183 format %{ "addq $dst, $src\t# long" %}
10184 ins_encode %{
10185 __ addq($dst$$Address, $src$$Register);
10186 %}
10187 ins_pipe(ialu_mem_reg);
10188 %}
10189
10190 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10191 %{
10192 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10193 effect(KILL cr);
10194 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10195
10196 ins_cost(125); // XXX
10197 format %{ "addq $dst, $src\t# long" %}
10198 ins_encode %{
10199 __ addq($dst$$Address, $src$$constant);
10200 %}
10201 ins_pipe(ialu_mem_imm);
10202 %}
10203
10204 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10205 %{
10206 predicate(!UseAPX && UseIncDec);
10207 match(Set dst (AddL dst src));
10208 effect(KILL cr);
10209
10210 format %{ "incq $dst\t# long" %}
10211 ins_encode %{
10212 __ incrementq($dst$$Register);
10213 %}
10214 ins_pipe(ialu_reg);
10215 %}
10216
10217 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10218 %{
10219 predicate(UseAPX && UseIncDec);
10220 match(Set dst (AddL src val));
10221 effect(KILL cr);
10222
10223 format %{ "eincq $dst, $src\t# long ndd" %}
10224 ins_encode %{
10225 __ eincq($dst$$Register, $src$$Register, false);
10226 %}
10227 ins_pipe(ialu_reg);
10228 %}
10229
10230 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10231 %{
10232 predicate(UseAPX && UseIncDec);
10233 match(Set dst (AddL (LoadL src) val));
10234 effect(KILL cr);
10235
10236 format %{ "eincq $dst, $src\t# long ndd" %}
10237 ins_encode %{
10238 __ eincq($dst$$Register, $src$$Address, false);
10239 %}
10240 ins_pipe(ialu_reg);
10241 %}
10242
10243 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10244 %{
10245 predicate(UseIncDec);
10246 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10247 effect(KILL cr);
10248
10249 ins_cost(125); // XXX
10250 format %{ "incq $dst\t# long" %}
10251 ins_encode %{
10252 __ incrementq($dst$$Address);
10253 %}
10254 ins_pipe(ialu_mem_imm);
10255 %}
10256
10257 // XXX why does that use AddL
10258 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10259 %{
10260 predicate(!UseAPX && UseIncDec);
10261 match(Set dst (AddL dst src));
10262 effect(KILL cr);
10263
10264 format %{ "decq $dst\t# long" %}
10265 ins_encode %{
10266 __ decrementq($dst$$Register);
10267 %}
10268 ins_pipe(ialu_reg);
10269 %}
10270
10271 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10272 %{
10273 predicate(UseAPX && UseIncDec);
10274 match(Set dst (AddL src val));
10275 effect(KILL cr);
10276
10277 format %{ "edecq $dst, $src\t# long ndd" %}
10278 ins_encode %{
10279 __ edecq($dst$$Register, $src$$Register, false);
10280 %}
10281 ins_pipe(ialu_reg);
10282 %}
10283
10284 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10285 %{
10286 predicate(UseAPX && UseIncDec);
10287 match(Set dst (AddL (LoadL src) val));
10288 effect(KILL cr);
10289
10290 format %{ "edecq $dst, $src\t# long ndd" %}
10291 ins_encode %{
10292 __ edecq($dst$$Register, $src$$Address, false);
10293 %}
10294 ins_pipe(ialu_reg);
10295 %}
10296
10297 // XXX why does that use AddL
10298 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10299 %{
10300 predicate(UseIncDec);
10301 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10302 effect(KILL cr);
10303
10304 ins_cost(125); // XXX
10305 format %{ "decq $dst\t# long" %}
10306 ins_encode %{
10307 __ decrementq($dst$$Address);
10308 %}
10309 ins_pipe(ialu_mem_imm);
10310 %}
10311
10312 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10313 %{
10314 predicate(VM_Version::supports_fast_2op_lea());
10315 match(Set dst (AddL (LShiftL index scale) disp));
10316
10317 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10318 ins_encode %{
10319 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10320 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10321 %}
10322 ins_pipe(ialu_reg_reg);
10323 %}
10324
10325 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10326 %{
10327 predicate(VM_Version::supports_fast_3op_lea());
10328 match(Set dst (AddL (AddL base index) disp));
10329
10330 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10331 ins_encode %{
10332 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10333 %}
10334 ins_pipe(ialu_reg_reg);
10335 %}
10336
10337 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10338 %{
10339 predicate(VM_Version::supports_fast_2op_lea());
10340 match(Set dst (AddL base (LShiftL index scale)));
10341
10342 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10343 ins_encode %{
10344 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10345 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10346 %}
10347 ins_pipe(ialu_reg_reg);
10348 %}
10349
10350 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10351 %{
10352 predicate(VM_Version::supports_fast_3op_lea());
10353 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10354
10355 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10356 ins_encode %{
10357 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10358 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10359 %}
10360 ins_pipe(ialu_reg_reg);
10361 %}
10362
10363 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10364 %{
10365 match(Set dst (AddP dst src));
10366 effect(KILL cr);
10367 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10368
10369 format %{ "addq $dst, $src\t# ptr" %}
10370 ins_encode %{
10371 __ addq($dst$$Register, $src$$Register);
10372 %}
10373 ins_pipe(ialu_reg_reg);
10374 %}
10375
10376 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10377 %{
10378 match(Set dst (AddP dst src));
10379 effect(KILL cr);
10380 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10381
10382 format %{ "addq $dst, $src\t# ptr" %}
10383 ins_encode %{
10384 __ addq($dst$$Register, $src$$constant);
10385 %}
10386 ins_pipe( ialu_reg );
10387 %}
10388
10389 // XXX addP mem ops ????
10390
10391 instruct checkCastPP(rRegP dst)
10392 %{
10393 match(Set dst (CheckCastPP dst));
10394
10395 size(0);
10396 format %{ "# checkcastPP of $dst" %}
10397 ins_encode(/* empty encoding */);
10398 ins_pipe(empty);
10399 %}
10400
10401 instruct castPP(rRegP dst)
10402 %{
10403 match(Set dst (CastPP dst));
10404
10405 size(0);
10406 format %{ "# castPP of $dst" %}
10407 ins_encode(/* empty encoding */);
10408 ins_pipe(empty);
10409 %}
10410
10411 instruct castII(rRegI dst)
10412 %{
10413 predicate(VerifyConstraintCasts == 0);
10414 match(Set dst (CastII dst));
10415
10416 size(0);
10417 format %{ "# castII of $dst" %}
10418 ins_encode(/* empty encoding */);
10419 ins_cost(0);
10420 ins_pipe(empty);
10421 %}
10422
10423 instruct castII_checked(rRegI dst, rFlagsReg cr)
10424 %{
10425 predicate(VerifyConstraintCasts > 0);
10426 match(Set dst (CastII dst));
10427
10428 effect(KILL cr);
10429 format %{ "# cast_checked_II $dst" %}
10430 ins_encode %{
10431 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10432 %}
10433 ins_pipe(pipe_slow);
10434 %}
10435
10436 instruct castLL(rRegL dst)
10437 %{
10438 predicate(VerifyConstraintCasts == 0);
10439 match(Set dst (CastLL dst));
10440
10441 size(0);
10442 format %{ "# castLL of $dst" %}
10443 ins_encode(/* empty encoding */);
10444 ins_cost(0);
10445 ins_pipe(empty);
10446 %}
10447
10448 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10449 %{
10450 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10451 match(Set dst (CastLL dst));
10452
10453 effect(KILL cr);
10454 format %{ "# cast_checked_LL $dst" %}
10455 ins_encode %{
10456 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10457 %}
10458 ins_pipe(pipe_slow);
10459 %}
10460
10461 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10462 %{
10463 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10464 match(Set dst (CastLL dst));
10465
10466 effect(KILL cr, TEMP tmp);
10467 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10468 ins_encode %{
10469 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10470 %}
10471 ins_pipe(pipe_slow);
10472 %}
10473
10474 instruct castFF(regF dst)
10475 %{
10476 match(Set dst (CastFF dst));
10477
10478 size(0);
10479 format %{ "# castFF of $dst" %}
10480 ins_encode(/* empty encoding */);
10481 ins_cost(0);
10482 ins_pipe(empty);
10483 %}
10484
10485 instruct castHH(regF dst)
10486 %{
10487 match(Set dst (CastHH dst));
10488
10489 size(0);
10490 format %{ "# castHH of $dst" %}
10491 ins_encode(/* empty encoding */);
10492 ins_cost(0);
10493 ins_pipe(empty);
10494 %}
10495
10496 instruct castDD(regD dst)
10497 %{
10498 match(Set dst (CastDD dst));
10499
10500 size(0);
10501 format %{ "# castDD of $dst" %}
10502 ins_encode(/* empty encoding */);
10503 ins_cost(0);
10504 ins_pipe(empty);
10505 %}
10506
10507 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10508 instruct compareAndSwapP(rRegI res,
10509 memory mem_ptr,
10510 rax_RegP oldval, rRegP newval,
10511 rFlagsReg cr)
10512 %{
10513 predicate(n->as_LoadStore()->barrier_data() == 0);
10514 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10515 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10516 effect(KILL cr, KILL oldval);
10517
10518 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10519 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10520 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10521 ins_encode %{
10522 __ lock();
10523 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10524 __ setcc(Assembler::equal, $res$$Register);
10525 %}
10526 ins_pipe( pipe_cmpxchg );
10527 %}
10528
10529 instruct compareAndSwapL(rRegI res,
10530 memory mem_ptr,
10531 rax_RegL oldval, rRegL newval,
10532 rFlagsReg cr)
10533 %{
10534 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10535 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10536 effect(KILL cr, KILL oldval);
10537
10538 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10539 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10540 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10541 ins_encode %{
10542 __ lock();
10543 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10544 __ setcc(Assembler::equal, $res$$Register);
10545 %}
10546 ins_pipe( pipe_cmpxchg );
10547 %}
10548
10549 instruct compareAndSwapI(rRegI res,
10550 memory mem_ptr,
10551 rax_RegI oldval, rRegI newval,
10552 rFlagsReg cr)
10553 %{
10554 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10555 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10556 effect(KILL cr, KILL oldval);
10557
10558 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10559 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10560 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10561 ins_encode %{
10562 __ lock();
10563 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10564 __ setcc(Assembler::equal, $res$$Register);
10565 %}
10566 ins_pipe( pipe_cmpxchg );
10567 %}
10568
10569 instruct compareAndSwapB(rRegI res,
10570 memory mem_ptr,
10571 rax_RegI oldval, rRegI newval,
10572 rFlagsReg cr)
10573 %{
10574 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10575 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10576 effect(KILL cr, KILL oldval);
10577
10578 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10579 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10580 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10581 ins_encode %{
10582 __ lock();
10583 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10584 __ setcc(Assembler::equal, $res$$Register);
10585 %}
10586 ins_pipe( pipe_cmpxchg );
10587 %}
10588
10589 instruct compareAndSwapS(rRegI res,
10590 memory mem_ptr,
10591 rax_RegI oldval, rRegI newval,
10592 rFlagsReg cr)
10593 %{
10594 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10595 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10596 effect(KILL cr, KILL oldval);
10597
10598 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10599 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10600 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10601 ins_encode %{
10602 __ lock();
10603 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10604 __ setcc(Assembler::equal, $res$$Register);
10605 %}
10606 ins_pipe( pipe_cmpxchg );
10607 %}
10608
10609 instruct compareAndSwapN(rRegI res,
10610 memory mem_ptr,
10611 rax_RegN oldval, rRegN newval,
10612 rFlagsReg cr) %{
10613 predicate(n->as_LoadStore()->barrier_data() == 0);
10614 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10615 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10616 effect(KILL cr, KILL oldval);
10617
10618 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10619 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10620 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10621 ins_encode %{
10622 __ lock();
10623 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10624 __ setcc(Assembler::equal, $res$$Register);
10625 %}
10626 ins_pipe( pipe_cmpxchg );
10627 %}
10628
10629 instruct compareAndExchangeB(
10630 memory mem_ptr,
10631 rax_RegI oldval, rRegI newval,
10632 rFlagsReg cr)
10633 %{
10634 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10635 effect(KILL cr);
10636
10637 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10638 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10639 ins_encode %{
10640 __ lock();
10641 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10642 %}
10643 ins_pipe( pipe_cmpxchg );
10644 %}
10645
10646 instruct compareAndExchangeS(
10647 memory mem_ptr,
10648 rax_RegI oldval, rRegI newval,
10649 rFlagsReg cr)
10650 %{
10651 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10652 effect(KILL cr);
10653
10654 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10655 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10656 ins_encode %{
10657 __ lock();
10658 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10659 %}
10660 ins_pipe( pipe_cmpxchg );
10661 %}
10662
10663 instruct compareAndExchangeI(
10664 memory mem_ptr,
10665 rax_RegI oldval, rRegI newval,
10666 rFlagsReg cr)
10667 %{
10668 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10669 effect(KILL cr);
10670
10671 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10672 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10673 ins_encode %{
10674 __ lock();
10675 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10676 %}
10677 ins_pipe( pipe_cmpxchg );
10678 %}
10679
10680 instruct compareAndExchangeL(
10681 memory mem_ptr,
10682 rax_RegL oldval, rRegL newval,
10683 rFlagsReg cr)
10684 %{
10685 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10686 effect(KILL cr);
10687
10688 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10689 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10690 ins_encode %{
10691 __ lock();
10692 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10693 %}
10694 ins_pipe( pipe_cmpxchg );
10695 %}
10696
10697 instruct compareAndExchangeN(
10698 memory mem_ptr,
10699 rax_RegN oldval, rRegN newval,
10700 rFlagsReg cr) %{
10701 predicate(n->as_LoadStore()->barrier_data() == 0);
10702 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10703 effect(KILL cr);
10704
10705 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10706 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10707 ins_encode %{
10708 __ lock();
10709 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10710 %}
10711 ins_pipe( pipe_cmpxchg );
10712 %}
10713
10714 instruct compareAndExchangeP(
10715 memory mem_ptr,
10716 rax_RegP oldval, rRegP newval,
10717 rFlagsReg cr)
10718 %{
10719 predicate(n->as_LoadStore()->barrier_data() == 0);
10720 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10721 effect(KILL cr);
10722
10723 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10724 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10725 ins_encode %{
10726 __ lock();
10727 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10728 %}
10729 ins_pipe( pipe_cmpxchg );
10730 %}
10731
10732 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10733 predicate(n->as_LoadStore()->result_not_used());
10734 match(Set dummy (GetAndAddB mem add));
10735 effect(KILL cr);
10736 format %{ "addb_lock $mem, $add" %}
10737 ins_encode %{
10738 __ lock();
10739 __ addb($mem$$Address, $add$$Register);
10740 %}
10741 ins_pipe(pipe_cmpxchg);
10742 %}
10743
10744 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10745 predicate(n->as_LoadStore()->result_not_used());
10746 match(Set dummy (GetAndAddB mem add));
10747 effect(KILL cr);
10748 format %{ "addb_lock $mem, $add" %}
10749 ins_encode %{
10750 __ lock();
10751 __ addb($mem$$Address, $add$$constant);
10752 %}
10753 ins_pipe(pipe_cmpxchg);
10754 %}
10755
10756 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10757 predicate(!n->as_LoadStore()->result_not_used());
10758 match(Set newval (GetAndAddB mem newval));
10759 effect(KILL cr);
10760 format %{ "xaddb_lock $mem, $newval" %}
10761 ins_encode %{
10762 __ lock();
10763 __ xaddb($mem$$Address, $newval$$Register);
10764 %}
10765 ins_pipe(pipe_cmpxchg);
10766 %}
10767
10768 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10769 predicate(n->as_LoadStore()->result_not_used());
10770 match(Set dummy (GetAndAddS mem add));
10771 effect(KILL cr);
10772 format %{ "addw_lock $mem, $add" %}
10773 ins_encode %{
10774 __ lock();
10775 __ addw($mem$$Address, $add$$Register);
10776 %}
10777 ins_pipe(pipe_cmpxchg);
10778 %}
10779
10780 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10781 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10782 match(Set dummy (GetAndAddS mem add));
10783 effect(KILL cr);
10784 format %{ "addw_lock $mem, $add" %}
10785 ins_encode %{
10786 __ lock();
10787 __ addw($mem$$Address, $add$$constant);
10788 %}
10789 ins_pipe(pipe_cmpxchg);
10790 %}
10791
10792 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10793 predicate(!n->as_LoadStore()->result_not_used());
10794 match(Set newval (GetAndAddS mem newval));
10795 effect(KILL cr);
10796 format %{ "xaddw_lock $mem, $newval" %}
10797 ins_encode %{
10798 __ lock();
10799 __ xaddw($mem$$Address, $newval$$Register);
10800 %}
10801 ins_pipe(pipe_cmpxchg);
10802 %}
10803
10804 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10805 predicate(n->as_LoadStore()->result_not_used());
10806 match(Set dummy (GetAndAddI mem add));
10807 effect(KILL cr);
10808 format %{ "addl_lock $mem, $add" %}
10809 ins_encode %{
10810 __ lock();
10811 __ addl($mem$$Address, $add$$Register);
10812 %}
10813 ins_pipe(pipe_cmpxchg);
10814 %}
10815
10816 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10817 predicate(n->as_LoadStore()->result_not_used());
10818 match(Set dummy (GetAndAddI mem add));
10819 effect(KILL cr);
10820 format %{ "addl_lock $mem, $add" %}
10821 ins_encode %{
10822 __ lock();
10823 __ addl($mem$$Address, $add$$constant);
10824 %}
10825 ins_pipe(pipe_cmpxchg);
10826 %}
10827
10828 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10829 predicate(!n->as_LoadStore()->result_not_used());
10830 match(Set newval (GetAndAddI mem newval));
10831 effect(KILL cr);
10832 format %{ "xaddl_lock $mem, $newval" %}
10833 ins_encode %{
10834 __ lock();
10835 __ xaddl($mem$$Address, $newval$$Register);
10836 %}
10837 ins_pipe(pipe_cmpxchg);
10838 %}
10839
10840 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10841 predicate(n->as_LoadStore()->result_not_used());
10842 match(Set dummy (GetAndAddL mem add));
10843 effect(KILL cr);
10844 format %{ "addq_lock $mem, $add" %}
10845 ins_encode %{
10846 __ lock();
10847 __ addq($mem$$Address, $add$$Register);
10848 %}
10849 ins_pipe(pipe_cmpxchg);
10850 %}
10851
10852 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10853 predicate(n->as_LoadStore()->result_not_used());
10854 match(Set dummy (GetAndAddL mem add));
10855 effect(KILL cr);
10856 format %{ "addq_lock $mem, $add" %}
10857 ins_encode %{
10858 __ lock();
10859 __ addq($mem$$Address, $add$$constant);
10860 %}
10861 ins_pipe(pipe_cmpxchg);
10862 %}
10863
10864 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10865 predicate(!n->as_LoadStore()->result_not_used());
10866 match(Set newval (GetAndAddL mem newval));
10867 effect(KILL cr);
10868 format %{ "xaddq_lock $mem, $newval" %}
10869 ins_encode %{
10870 __ lock();
10871 __ xaddq($mem$$Address, $newval$$Register);
10872 %}
10873 ins_pipe(pipe_cmpxchg);
10874 %}
10875
10876 instruct xchgB( memory mem, rRegI newval) %{
10877 match(Set newval (GetAndSetB mem newval));
10878 format %{ "XCHGB $newval,[$mem]" %}
10879 ins_encode %{
10880 __ xchgb($newval$$Register, $mem$$Address);
10881 %}
10882 ins_pipe( pipe_cmpxchg );
10883 %}
10884
10885 instruct xchgS( memory mem, rRegI newval) %{
10886 match(Set newval (GetAndSetS mem newval));
10887 format %{ "XCHGW $newval,[$mem]" %}
10888 ins_encode %{
10889 __ xchgw($newval$$Register, $mem$$Address);
10890 %}
10891 ins_pipe( pipe_cmpxchg );
10892 %}
10893
10894 instruct xchgI( memory mem, rRegI newval) %{
10895 match(Set newval (GetAndSetI mem newval));
10896 format %{ "XCHGL $newval,[$mem]" %}
10897 ins_encode %{
10898 __ xchgl($newval$$Register, $mem$$Address);
10899 %}
10900 ins_pipe( pipe_cmpxchg );
10901 %}
10902
10903 instruct xchgL( memory mem, rRegL newval) %{
10904 match(Set newval (GetAndSetL mem newval));
10905 format %{ "XCHGL $newval,[$mem]" %}
10906 ins_encode %{
10907 __ xchgq($newval$$Register, $mem$$Address);
10908 %}
10909 ins_pipe( pipe_cmpxchg );
10910 %}
10911
10912 instruct xchgP( memory mem, rRegP newval) %{
10913 match(Set newval (GetAndSetP mem newval));
10914 predicate(n->as_LoadStore()->barrier_data() == 0);
10915 format %{ "XCHGQ $newval,[$mem]" %}
10916 ins_encode %{
10917 __ xchgq($newval$$Register, $mem$$Address);
10918 %}
10919 ins_pipe( pipe_cmpxchg );
10920 %}
10921
10922 instruct xchgN( memory mem, rRegN newval) %{
10923 predicate(n->as_LoadStore()->barrier_data() == 0);
10924 match(Set newval (GetAndSetN mem newval));
10925 format %{ "XCHGL $newval,$mem]" %}
10926 ins_encode %{
10927 __ xchgl($newval$$Register, $mem$$Address);
10928 %}
10929 ins_pipe( pipe_cmpxchg );
10930 %}
10931
10932 //----------Abs Instructions-------------------------------------------
10933
10934 // Integer Absolute Instructions
10935 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10936 %{
10937 match(Set dst (AbsI src));
10938 effect(TEMP dst, KILL cr);
10939 format %{ "xorl $dst, $dst\t# abs int\n\t"
10940 "subl $dst, $src\n\t"
10941 "cmovll $dst, $src" %}
10942 ins_encode %{
10943 __ xorl($dst$$Register, $dst$$Register);
10944 __ subl($dst$$Register, $src$$Register);
10945 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10946 %}
10947
10948 ins_pipe(ialu_reg_reg);
10949 %}
10950
10951 // Long Absolute Instructions
10952 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10953 %{
10954 match(Set dst (AbsL src));
10955 effect(TEMP dst, KILL cr);
10956 format %{ "xorl $dst, $dst\t# abs long\n\t"
10957 "subq $dst, $src\n\t"
10958 "cmovlq $dst, $src" %}
10959 ins_encode %{
10960 __ xorl($dst$$Register, $dst$$Register);
10961 __ subq($dst$$Register, $src$$Register);
10962 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10963 %}
10964
10965 ins_pipe(ialu_reg_reg);
10966 %}
10967
10968 //----------Subtraction Instructions-------------------------------------------
10969
10970 // Integer Subtraction Instructions
10971 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10972 %{
10973 predicate(!UseAPX);
10974 match(Set dst (SubI dst src));
10975 effect(KILL cr);
10976 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10977
10978 format %{ "subl $dst, $src\t# int" %}
10979 ins_encode %{
10980 __ subl($dst$$Register, $src$$Register);
10981 %}
10982 ins_pipe(ialu_reg_reg);
10983 %}
10984
10985 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10986 %{
10987 predicate(UseAPX);
10988 match(Set dst (SubI src1 src2));
10989 effect(KILL cr);
10990 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10991
10992 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
10993 ins_encode %{
10994 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10995 %}
10996 ins_pipe(ialu_reg_reg);
10997 %}
10998
10999 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11000 %{
11001 predicate(UseAPX);
11002 match(Set dst (SubI src1 src2));
11003 effect(KILL cr);
11004 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11005
11006 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11007 ins_encode %{
11008 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11009 %}
11010 ins_pipe(ialu_reg_reg);
11011 %}
11012
11013 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11014 %{
11015 predicate(UseAPX);
11016 match(Set dst (SubI (LoadI src1) src2));
11017 effect(KILL cr);
11018 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11019
11020 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11021 ins_encode %{
11022 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11023 %}
11024 ins_pipe(ialu_reg_reg);
11025 %}
11026
11027 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11028 %{
11029 predicate(!UseAPX);
11030 match(Set dst (SubI dst (LoadI src)));
11031 effect(KILL cr);
11032 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11033
11034 ins_cost(150);
11035 format %{ "subl $dst, $src\t# int" %}
11036 ins_encode %{
11037 __ subl($dst$$Register, $src$$Address);
11038 %}
11039 ins_pipe(ialu_reg_mem);
11040 %}
11041
11042 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11043 %{
11044 predicate(UseAPX);
11045 match(Set dst (SubI src1 (LoadI src2)));
11046 effect(KILL cr);
11047 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11048
11049 ins_cost(150);
11050 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11051 ins_encode %{
11052 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11053 %}
11054 ins_pipe(ialu_reg_mem);
11055 %}
11056
11057 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11058 %{
11059 predicate(UseAPX);
11060 match(Set dst (SubI (LoadI src1) src2));
11061 effect(KILL cr);
11062 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11063
11064 ins_cost(150);
11065 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11066 ins_encode %{
11067 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11068 %}
11069 ins_pipe(ialu_reg_mem);
11070 %}
11071
11072 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11073 %{
11074 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11075 effect(KILL cr);
11076 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11077
11078 ins_cost(150);
11079 format %{ "subl $dst, $src\t# int" %}
11080 ins_encode %{
11081 __ subl($dst$$Address, $src$$Register);
11082 %}
11083 ins_pipe(ialu_mem_reg);
11084 %}
11085
11086 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11087 %{
11088 predicate(!UseAPX);
11089 match(Set dst (SubL dst src));
11090 effect(KILL cr);
11091 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11092
11093 format %{ "subq $dst, $src\t# long" %}
11094 ins_encode %{
11095 __ subq($dst$$Register, $src$$Register);
11096 %}
11097 ins_pipe(ialu_reg_reg);
11098 %}
11099
11100 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11101 %{
11102 predicate(UseAPX);
11103 match(Set dst (SubL src1 src2));
11104 effect(KILL cr);
11105 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11106
11107 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11108 ins_encode %{
11109 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11110 %}
11111 ins_pipe(ialu_reg_reg);
11112 %}
11113
11114 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11115 %{
11116 predicate(UseAPX);
11117 match(Set dst (SubL src1 src2));
11118 effect(KILL cr);
11119 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11120
11121 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11122 ins_encode %{
11123 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11124 %}
11125 ins_pipe(ialu_reg_reg);
11126 %}
11127
11128 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11129 %{
11130 predicate(UseAPX);
11131 match(Set dst (SubL (LoadL src1) src2));
11132 effect(KILL cr);
11133 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11134
11135 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11136 ins_encode %{
11137 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11138 %}
11139 ins_pipe(ialu_reg_reg);
11140 %}
11141
11142 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11143 %{
11144 predicate(!UseAPX);
11145 match(Set dst (SubL dst (LoadL src)));
11146 effect(KILL cr);
11147 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11148
11149 ins_cost(150);
11150 format %{ "subq $dst, $src\t# long" %}
11151 ins_encode %{
11152 __ subq($dst$$Register, $src$$Address);
11153 %}
11154 ins_pipe(ialu_reg_mem);
11155 %}
11156
11157 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11158 %{
11159 predicate(UseAPX);
11160 match(Set dst (SubL src1 (LoadL src2)));
11161 effect(KILL cr);
11162 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11163
11164 ins_cost(150);
11165 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11166 ins_encode %{
11167 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11168 %}
11169 ins_pipe(ialu_reg_mem);
11170 %}
11171
11172 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11173 %{
11174 predicate(UseAPX);
11175 match(Set dst (SubL (LoadL src1) src2));
11176 effect(KILL cr);
11177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178
11179 ins_cost(150);
11180 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11181 ins_encode %{
11182 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11183 %}
11184 ins_pipe(ialu_reg_mem);
11185 %}
11186
11187 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11188 %{
11189 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11190 effect(KILL cr);
11191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11192
11193 ins_cost(150);
11194 format %{ "subq $dst, $src\t# long" %}
11195 ins_encode %{
11196 __ subq($dst$$Address, $src$$Register);
11197 %}
11198 ins_pipe(ialu_mem_reg);
11199 %}
11200
11201 // Subtract from a pointer
11202 // XXX hmpf???
11203 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11204 %{
11205 match(Set dst (AddP dst (SubI zero src)));
11206 effect(KILL cr);
11207
11208 format %{ "subq $dst, $src\t# ptr - int" %}
11209 ins_encode %{
11210 __ subq($dst$$Register, $src$$Register);
11211 %}
11212 ins_pipe(ialu_reg_reg);
11213 %}
11214
11215 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11216 %{
11217 predicate(!UseAPX);
11218 match(Set dst (SubI zero dst));
11219 effect(KILL cr);
11220 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11221
11222 format %{ "negl $dst\t# int" %}
11223 ins_encode %{
11224 __ negl($dst$$Register);
11225 %}
11226 ins_pipe(ialu_reg);
11227 %}
11228
11229 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11230 %{
11231 predicate(UseAPX);
11232 match(Set dst (SubI zero src));
11233 effect(KILL cr);
11234 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11235
11236 format %{ "enegl $dst, $src\t# int ndd" %}
11237 ins_encode %{
11238 __ enegl($dst$$Register, $src$$Register, false);
11239 %}
11240 ins_pipe(ialu_reg);
11241 %}
11242
11243 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11244 %{
11245 predicate(!UseAPX);
11246 match(Set dst (NegI dst));
11247 effect(KILL cr);
11248 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11249
11250 format %{ "negl $dst\t# int" %}
11251 ins_encode %{
11252 __ negl($dst$$Register);
11253 %}
11254 ins_pipe(ialu_reg);
11255 %}
11256
11257 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11258 %{
11259 predicate(UseAPX);
11260 match(Set dst (NegI src));
11261 effect(KILL cr);
11262 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11263
11264 format %{ "enegl $dst, $src\t# int ndd" %}
11265 ins_encode %{
11266 __ enegl($dst$$Register, $src$$Register, false);
11267 %}
11268 ins_pipe(ialu_reg);
11269 %}
11270
11271 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11272 %{
11273 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11274 effect(KILL cr);
11275 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11276
11277 format %{ "negl $dst\t# int" %}
11278 ins_encode %{
11279 __ negl($dst$$Address);
11280 %}
11281 ins_pipe(ialu_reg);
11282 %}
11283
11284 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11285 %{
11286 predicate(!UseAPX);
11287 match(Set dst (SubL zero dst));
11288 effect(KILL cr);
11289 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11290
11291 format %{ "negq $dst\t# long" %}
11292 ins_encode %{
11293 __ negq($dst$$Register);
11294 %}
11295 ins_pipe(ialu_reg);
11296 %}
11297
11298 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11299 %{
11300 predicate(UseAPX);
11301 match(Set dst (SubL zero src));
11302 effect(KILL cr);
11303 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11304
11305 format %{ "enegq $dst, $src\t# long ndd" %}
11306 ins_encode %{
11307 __ enegq($dst$$Register, $src$$Register, false);
11308 %}
11309 ins_pipe(ialu_reg);
11310 %}
11311
11312 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11313 %{
11314 predicate(!UseAPX);
11315 match(Set dst (NegL dst));
11316 effect(KILL cr);
11317 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11318
11319 format %{ "negq $dst\t# int" %}
11320 ins_encode %{
11321 __ negq($dst$$Register);
11322 %}
11323 ins_pipe(ialu_reg);
11324 %}
11325
11326 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11327 %{
11328 predicate(UseAPX);
11329 match(Set dst (NegL src));
11330 effect(KILL cr);
11331 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11332
11333 format %{ "enegq $dst, $src\t# long ndd" %}
11334 ins_encode %{
11335 __ enegq($dst$$Register, $src$$Register, false);
11336 %}
11337 ins_pipe(ialu_reg);
11338 %}
11339
11340 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11341 %{
11342 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11343 effect(KILL cr);
11344 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11345
11346 format %{ "negq $dst\t# long" %}
11347 ins_encode %{
11348 __ negq($dst$$Address);
11349 %}
11350 ins_pipe(ialu_reg);
11351 %}
11352
11353 //----------Multiplication/Division Instructions-------------------------------
11354 // Integer Multiplication Instructions
11355 // Multiply Register
11356
11357 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11358 %{
11359 predicate(!UseAPX);
11360 match(Set dst (MulI dst src));
11361 effect(KILL cr);
11362
11363 ins_cost(300);
11364 format %{ "imull $dst, $src\t# int" %}
11365 ins_encode %{
11366 __ imull($dst$$Register, $src$$Register);
11367 %}
11368 ins_pipe(ialu_reg_reg_alu0);
11369 %}
11370
11371 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11372 %{
11373 predicate(UseAPX);
11374 match(Set dst (MulI src1 src2));
11375 effect(KILL cr);
11376
11377 ins_cost(300);
11378 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11379 ins_encode %{
11380 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11381 %}
11382 ins_pipe(ialu_reg_reg_alu0);
11383 %}
11384
11385 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11386 %{
11387 match(Set dst (MulI src imm));
11388 effect(KILL cr);
11389
11390 ins_cost(300);
11391 format %{ "imull $dst, $src, $imm\t# int" %}
11392 ins_encode %{
11393 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11394 %}
11395 ins_pipe(ialu_reg_reg_alu0);
11396 %}
11397
11398 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11399 %{
11400 predicate(!UseAPX);
11401 match(Set dst (MulI dst (LoadI src)));
11402 effect(KILL cr);
11403
11404 ins_cost(350);
11405 format %{ "imull $dst, $src\t# int" %}
11406 ins_encode %{
11407 __ imull($dst$$Register, $src$$Address);
11408 %}
11409 ins_pipe(ialu_reg_mem_alu0);
11410 %}
11411
11412 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11413 %{
11414 predicate(UseAPX);
11415 match(Set dst (MulI src1 (LoadI src2)));
11416 effect(KILL cr);
11417
11418 ins_cost(350);
11419 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11420 ins_encode %{
11421 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11422 %}
11423 ins_pipe(ialu_reg_mem_alu0);
11424 %}
11425
11426 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11427 %{
11428 match(Set dst (MulI (LoadI src) imm));
11429 effect(KILL cr);
11430
11431 ins_cost(300);
11432 format %{ "imull $dst, $src, $imm\t# int" %}
11433 ins_encode %{
11434 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11435 %}
11436 ins_pipe(ialu_reg_mem_alu0);
11437 %}
11438
11439 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11440 %{
11441 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11442 effect(KILL cr, KILL src2);
11443
11444 expand %{ mulI_rReg(dst, src1, cr);
11445 mulI_rReg(src2, src3, cr);
11446 addI_rReg(dst, src2, cr); %}
11447 %}
11448
11449 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11450 %{
11451 predicate(!UseAPX);
11452 match(Set dst (MulL dst src));
11453 effect(KILL cr);
11454
11455 ins_cost(300);
11456 format %{ "imulq $dst, $src\t# long" %}
11457 ins_encode %{
11458 __ imulq($dst$$Register, $src$$Register);
11459 %}
11460 ins_pipe(ialu_reg_reg_alu0);
11461 %}
11462
11463 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11464 %{
11465 predicate(UseAPX);
11466 match(Set dst (MulL src1 src2));
11467 effect(KILL cr);
11468
11469 ins_cost(300);
11470 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11471 ins_encode %{
11472 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11473 %}
11474 ins_pipe(ialu_reg_reg_alu0);
11475 %}
11476
11477 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11478 %{
11479 match(Set dst (MulL src imm));
11480 effect(KILL cr);
11481
11482 ins_cost(300);
11483 format %{ "imulq $dst, $src, $imm\t# long" %}
11484 ins_encode %{
11485 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11486 %}
11487 ins_pipe(ialu_reg_reg_alu0);
11488 %}
11489
11490 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11491 %{
11492 predicate(!UseAPX);
11493 match(Set dst (MulL dst (LoadL src)));
11494 effect(KILL cr);
11495
11496 ins_cost(350);
11497 format %{ "imulq $dst, $src\t# long" %}
11498 ins_encode %{
11499 __ imulq($dst$$Register, $src$$Address);
11500 %}
11501 ins_pipe(ialu_reg_mem_alu0);
11502 %}
11503
11504 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11505 %{
11506 predicate(UseAPX);
11507 match(Set dst (MulL src1 (LoadL src2)));
11508 effect(KILL cr);
11509
11510 ins_cost(350);
11511 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11512 ins_encode %{
11513 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11514 %}
11515 ins_pipe(ialu_reg_mem_alu0);
11516 %}
11517
11518 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11519 %{
11520 match(Set dst (MulL (LoadL src) imm));
11521 effect(KILL cr);
11522
11523 ins_cost(300);
11524 format %{ "imulq $dst, $src, $imm\t# long" %}
11525 ins_encode %{
11526 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11527 %}
11528 ins_pipe(ialu_reg_mem_alu0);
11529 %}
11530
11531 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11532 %{
11533 match(Set dst (MulHiL src rax));
11534 effect(USE_KILL rax, KILL cr);
11535
11536 ins_cost(300);
11537 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11538 ins_encode %{
11539 __ imulq($src$$Register);
11540 %}
11541 ins_pipe(ialu_reg_reg_alu0);
11542 %}
11543
11544 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11545 %{
11546 match(Set dst (UMulHiL src rax));
11547 effect(USE_KILL rax, KILL cr);
11548
11549 ins_cost(300);
11550 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11551 ins_encode %{
11552 __ mulq($src$$Register);
11553 %}
11554 ins_pipe(ialu_reg_reg_alu0);
11555 %}
11556
11557 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11558 rFlagsReg cr)
11559 %{
11560 match(Set rax (DivI rax div));
11561 effect(KILL rdx, KILL cr);
11562
11563 ins_cost(30*100+10*100); // XXX
11564 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11565 "jne,s normal\n\t"
11566 "xorl rdx, rdx\n\t"
11567 "cmpl $div, -1\n\t"
11568 "je,s done\n"
11569 "normal: cdql\n\t"
11570 "idivl $div\n"
11571 "done:" %}
11572 ins_encode(cdql_enc(div));
11573 ins_pipe(ialu_reg_reg_alu0);
11574 %}
11575
11576 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11577 rFlagsReg cr)
11578 %{
11579 match(Set rax (DivL rax div));
11580 effect(KILL rdx, KILL cr);
11581
11582 ins_cost(30*100+10*100); // XXX
11583 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11584 "cmpq rax, rdx\n\t"
11585 "jne,s normal\n\t"
11586 "xorl rdx, rdx\n\t"
11587 "cmpq $div, -1\n\t"
11588 "je,s done\n"
11589 "normal: cdqq\n\t"
11590 "idivq $div\n"
11591 "done:" %}
11592 ins_encode(cdqq_enc(div));
11593 ins_pipe(ialu_reg_reg_alu0);
11594 %}
11595
11596 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11597 %{
11598 match(Set rax (UDivI rax div));
11599 effect(KILL rdx, KILL cr);
11600
11601 ins_cost(300);
11602 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11603 ins_encode %{
11604 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11605 %}
11606 ins_pipe(ialu_reg_reg_alu0);
11607 %}
11608
11609 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11610 %{
11611 match(Set rax (UDivL rax div));
11612 effect(KILL rdx, KILL cr);
11613
11614 ins_cost(300);
11615 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11616 ins_encode %{
11617 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11618 %}
11619 ins_pipe(ialu_reg_reg_alu0);
11620 %}
11621
11622 // Integer DIVMOD with Register, both quotient and mod results
11623 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11624 rFlagsReg cr)
11625 %{
11626 match(DivModI rax div);
11627 effect(KILL cr);
11628
11629 ins_cost(30*100+10*100); // XXX
11630 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11631 "jne,s normal\n\t"
11632 "xorl rdx, rdx\n\t"
11633 "cmpl $div, -1\n\t"
11634 "je,s done\n"
11635 "normal: cdql\n\t"
11636 "idivl $div\n"
11637 "done:" %}
11638 ins_encode(cdql_enc(div));
11639 ins_pipe(pipe_slow);
11640 %}
11641
11642 // Long DIVMOD with Register, both quotient and mod results
11643 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11644 rFlagsReg cr)
11645 %{
11646 match(DivModL rax div);
11647 effect(KILL cr);
11648
11649 ins_cost(30*100+10*100); // XXX
11650 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11651 "cmpq rax, rdx\n\t"
11652 "jne,s normal\n\t"
11653 "xorl rdx, rdx\n\t"
11654 "cmpq $div, -1\n\t"
11655 "je,s done\n"
11656 "normal: cdqq\n\t"
11657 "idivq $div\n"
11658 "done:" %}
11659 ins_encode(cdqq_enc(div));
11660 ins_pipe(pipe_slow);
11661 %}
11662
11663 // Unsigned integer DIVMOD with Register, both quotient and mod results
11664 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11665 no_rax_rdx_RegI div, rFlagsReg cr)
11666 %{
11667 match(UDivModI rax div);
11668 effect(TEMP tmp, KILL cr);
11669
11670 ins_cost(300);
11671 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11672 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11673 %}
11674 ins_encode %{
11675 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11676 %}
11677 ins_pipe(pipe_slow);
11678 %}
11679
11680 // Unsigned long DIVMOD with Register, both quotient and mod results
11681 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11682 no_rax_rdx_RegL div, rFlagsReg cr)
11683 %{
11684 match(UDivModL rax div);
11685 effect(TEMP tmp, KILL cr);
11686
11687 ins_cost(300);
11688 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11689 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11690 %}
11691 ins_encode %{
11692 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11693 %}
11694 ins_pipe(pipe_slow);
11695 %}
11696
11697 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11698 rFlagsReg cr)
11699 %{
11700 match(Set rdx (ModI rax div));
11701 effect(KILL rax, KILL cr);
11702
11703 ins_cost(300); // XXX
11704 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11705 "jne,s normal\n\t"
11706 "xorl rdx, rdx\n\t"
11707 "cmpl $div, -1\n\t"
11708 "je,s done\n"
11709 "normal: cdql\n\t"
11710 "idivl $div\n"
11711 "done:" %}
11712 ins_encode(cdql_enc(div));
11713 ins_pipe(ialu_reg_reg_alu0);
11714 %}
11715
11716 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11717 rFlagsReg cr)
11718 %{
11719 match(Set rdx (ModL rax div));
11720 effect(KILL rax, KILL cr);
11721
11722 ins_cost(300); // XXX
11723 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11724 "cmpq rax, rdx\n\t"
11725 "jne,s normal\n\t"
11726 "xorl rdx, rdx\n\t"
11727 "cmpq $div, -1\n\t"
11728 "je,s done\n"
11729 "normal: cdqq\n\t"
11730 "idivq $div\n"
11731 "done:" %}
11732 ins_encode(cdqq_enc(div));
11733 ins_pipe(ialu_reg_reg_alu0);
11734 %}
11735
11736 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11737 %{
11738 match(Set rdx (UModI rax div));
11739 effect(KILL rax, KILL cr);
11740
11741 ins_cost(300);
11742 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11743 ins_encode %{
11744 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11745 %}
11746 ins_pipe(ialu_reg_reg_alu0);
11747 %}
11748
11749 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11750 %{
11751 match(Set rdx (UModL rax div));
11752 effect(KILL rax, KILL cr);
11753
11754 ins_cost(300);
11755 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11756 ins_encode %{
11757 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11758 %}
11759 ins_pipe(ialu_reg_reg_alu0);
11760 %}
11761
11762 // Integer Shift Instructions
11763 // Shift Left by one, two, three
11764 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11765 %{
11766 predicate(!UseAPX);
11767 match(Set dst (LShiftI dst shift));
11768 effect(KILL cr);
11769
11770 format %{ "sall $dst, $shift" %}
11771 ins_encode %{
11772 __ sall($dst$$Register, $shift$$constant);
11773 %}
11774 ins_pipe(ialu_reg);
11775 %}
11776
11777 // Shift Left by one, two, three
11778 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11779 %{
11780 predicate(UseAPX);
11781 match(Set dst (LShiftI src shift));
11782 effect(KILL cr);
11783
11784 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11785 ins_encode %{
11786 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11787 %}
11788 ins_pipe(ialu_reg);
11789 %}
11790
11791 // Shift Left by 8-bit immediate
11792 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11793 %{
11794 predicate(!UseAPX);
11795 match(Set dst (LShiftI dst shift));
11796 effect(KILL cr);
11797
11798 format %{ "sall $dst, $shift" %}
11799 ins_encode %{
11800 __ sall($dst$$Register, $shift$$constant);
11801 %}
11802 ins_pipe(ialu_reg);
11803 %}
11804
11805 // Shift Left by 8-bit immediate
11806 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11807 %{
11808 predicate(UseAPX);
11809 match(Set dst (LShiftI src shift));
11810 effect(KILL cr);
11811
11812 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11813 ins_encode %{
11814 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11815 %}
11816 ins_pipe(ialu_reg);
11817 %}
11818
11819 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11820 %{
11821 predicate(UseAPX);
11822 match(Set dst (LShiftI (LoadI src) shift));
11823 effect(KILL cr);
11824
11825 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11826 ins_encode %{
11827 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11828 %}
11829 ins_pipe(ialu_reg);
11830 %}
11831
11832 // Shift Left by 8-bit immediate
11833 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11834 %{
11835 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11836 effect(KILL cr);
11837
11838 format %{ "sall $dst, $shift" %}
11839 ins_encode %{
11840 __ sall($dst$$Address, $shift$$constant);
11841 %}
11842 ins_pipe(ialu_mem_imm);
11843 %}
11844
11845 // Shift Left by variable
11846 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11847 %{
11848 predicate(!VM_Version::supports_bmi2());
11849 match(Set dst (LShiftI dst shift));
11850 effect(KILL cr);
11851
11852 format %{ "sall $dst, $shift" %}
11853 ins_encode %{
11854 __ sall($dst$$Register);
11855 %}
11856 ins_pipe(ialu_reg_reg);
11857 %}
11858
11859 // Shift Left by variable
11860 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11861 %{
11862 predicate(!VM_Version::supports_bmi2());
11863 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11864 effect(KILL cr);
11865
11866 format %{ "sall $dst, $shift" %}
11867 ins_encode %{
11868 __ sall($dst$$Address);
11869 %}
11870 ins_pipe(ialu_mem_reg);
11871 %}
11872
11873 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11874 %{
11875 predicate(VM_Version::supports_bmi2());
11876 match(Set dst (LShiftI src shift));
11877
11878 format %{ "shlxl $dst, $src, $shift" %}
11879 ins_encode %{
11880 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11881 %}
11882 ins_pipe(ialu_reg_reg);
11883 %}
11884
11885 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11886 %{
11887 predicate(VM_Version::supports_bmi2());
11888 match(Set dst (LShiftI (LoadI src) shift));
11889 ins_cost(175);
11890 format %{ "shlxl $dst, $src, $shift" %}
11891 ins_encode %{
11892 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11893 %}
11894 ins_pipe(ialu_reg_mem);
11895 %}
11896
11897 // Arithmetic Shift Right by 8-bit immediate
11898 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11899 %{
11900 predicate(!UseAPX);
11901 match(Set dst (RShiftI dst shift));
11902 effect(KILL cr);
11903
11904 format %{ "sarl $dst, $shift" %}
11905 ins_encode %{
11906 __ sarl($dst$$Register, $shift$$constant);
11907 %}
11908 ins_pipe(ialu_mem_imm);
11909 %}
11910
11911 // Arithmetic Shift Right by 8-bit immediate
11912 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11913 %{
11914 predicate(UseAPX);
11915 match(Set dst (RShiftI src shift));
11916 effect(KILL cr);
11917
11918 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11919 ins_encode %{
11920 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11921 %}
11922 ins_pipe(ialu_mem_imm);
11923 %}
11924
11925 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11926 %{
11927 predicate(UseAPX);
11928 match(Set dst (RShiftI (LoadI src) shift));
11929 effect(KILL cr);
11930
11931 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11932 ins_encode %{
11933 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11934 %}
11935 ins_pipe(ialu_mem_imm);
11936 %}
11937
11938 // Arithmetic Shift Right by 8-bit immediate
11939 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11940 %{
11941 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11942 effect(KILL cr);
11943
11944 format %{ "sarl $dst, $shift" %}
11945 ins_encode %{
11946 __ sarl($dst$$Address, $shift$$constant);
11947 %}
11948 ins_pipe(ialu_mem_imm);
11949 %}
11950
11951 // Arithmetic Shift Right by variable
11952 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11953 %{
11954 predicate(!VM_Version::supports_bmi2());
11955 match(Set dst (RShiftI dst shift));
11956 effect(KILL cr);
11957
11958 format %{ "sarl $dst, $shift" %}
11959 ins_encode %{
11960 __ sarl($dst$$Register);
11961 %}
11962 ins_pipe(ialu_reg_reg);
11963 %}
11964
11965 // Arithmetic Shift Right by variable
11966 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11967 %{
11968 predicate(!VM_Version::supports_bmi2());
11969 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11970 effect(KILL cr);
11971
11972 format %{ "sarl $dst, $shift" %}
11973 ins_encode %{
11974 __ sarl($dst$$Address);
11975 %}
11976 ins_pipe(ialu_mem_reg);
11977 %}
11978
11979 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11980 %{
11981 predicate(VM_Version::supports_bmi2());
11982 match(Set dst (RShiftI src shift));
11983
11984 format %{ "sarxl $dst, $src, $shift" %}
11985 ins_encode %{
11986 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11987 %}
11988 ins_pipe(ialu_reg_reg);
11989 %}
11990
11991 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11992 %{
11993 predicate(VM_Version::supports_bmi2());
11994 match(Set dst (RShiftI (LoadI src) shift));
11995 ins_cost(175);
11996 format %{ "sarxl $dst, $src, $shift" %}
11997 ins_encode %{
11998 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11999 %}
12000 ins_pipe(ialu_reg_mem);
12001 %}
12002
12003 // Logical Shift Right by 8-bit immediate
12004 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12005 %{
12006 predicate(!UseAPX);
12007 match(Set dst (URShiftI dst shift));
12008 effect(KILL cr);
12009
12010 format %{ "shrl $dst, $shift" %}
12011 ins_encode %{
12012 __ shrl($dst$$Register, $shift$$constant);
12013 %}
12014 ins_pipe(ialu_reg);
12015 %}
12016
12017 // Logical Shift Right by 8-bit immediate
12018 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12019 %{
12020 predicate(UseAPX);
12021 match(Set dst (URShiftI src shift));
12022 effect(KILL cr);
12023
12024 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12025 ins_encode %{
12026 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12027 %}
12028 ins_pipe(ialu_reg);
12029 %}
12030
12031 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12032 %{
12033 predicate(UseAPX);
12034 match(Set dst (URShiftI (LoadI src) shift));
12035 effect(KILL cr);
12036
12037 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12038 ins_encode %{
12039 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12040 %}
12041 ins_pipe(ialu_reg);
12042 %}
12043
12044 // Logical Shift Right by 8-bit immediate
12045 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12046 %{
12047 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12048 effect(KILL cr);
12049
12050 format %{ "shrl $dst, $shift" %}
12051 ins_encode %{
12052 __ shrl($dst$$Address, $shift$$constant);
12053 %}
12054 ins_pipe(ialu_mem_imm);
12055 %}
12056
12057 // Logical Shift Right by variable
12058 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12059 %{
12060 predicate(!VM_Version::supports_bmi2());
12061 match(Set dst (URShiftI dst shift));
12062 effect(KILL cr);
12063
12064 format %{ "shrl $dst, $shift" %}
12065 ins_encode %{
12066 __ shrl($dst$$Register);
12067 %}
12068 ins_pipe(ialu_reg_reg);
12069 %}
12070
12071 // Logical Shift Right by variable
12072 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12073 %{
12074 predicate(!VM_Version::supports_bmi2());
12075 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12076 effect(KILL cr);
12077
12078 format %{ "shrl $dst, $shift" %}
12079 ins_encode %{
12080 __ shrl($dst$$Address);
12081 %}
12082 ins_pipe(ialu_mem_reg);
12083 %}
12084
12085 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12086 %{
12087 predicate(VM_Version::supports_bmi2());
12088 match(Set dst (URShiftI src shift));
12089
12090 format %{ "shrxl $dst, $src, $shift" %}
12091 ins_encode %{
12092 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12093 %}
12094 ins_pipe(ialu_reg_reg);
12095 %}
12096
12097 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12098 %{
12099 predicate(VM_Version::supports_bmi2());
12100 match(Set dst (URShiftI (LoadI src) shift));
12101 ins_cost(175);
12102 format %{ "shrxl $dst, $src, $shift" %}
12103 ins_encode %{
12104 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12105 %}
12106 ins_pipe(ialu_reg_mem);
12107 %}
12108
12109 // Long Shift Instructions
12110 // Shift Left by one, two, three
12111 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12112 %{
12113 predicate(!UseAPX);
12114 match(Set dst (LShiftL dst shift));
12115 effect(KILL cr);
12116
12117 format %{ "salq $dst, $shift" %}
12118 ins_encode %{
12119 __ salq($dst$$Register, $shift$$constant);
12120 %}
12121 ins_pipe(ialu_reg);
12122 %}
12123
12124 // Shift Left by one, two, three
12125 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12126 %{
12127 predicate(UseAPX);
12128 match(Set dst (LShiftL src shift));
12129 effect(KILL cr);
12130
12131 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12132 ins_encode %{
12133 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12134 %}
12135 ins_pipe(ialu_reg);
12136 %}
12137
12138 // Shift Left by 8-bit immediate
12139 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12140 %{
12141 predicate(!UseAPX);
12142 match(Set dst (LShiftL dst shift));
12143 effect(KILL cr);
12144
12145 format %{ "salq $dst, $shift" %}
12146 ins_encode %{
12147 __ salq($dst$$Register, $shift$$constant);
12148 %}
12149 ins_pipe(ialu_reg);
12150 %}
12151
12152 // Shift Left by 8-bit immediate
12153 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12154 %{
12155 predicate(UseAPX);
12156 match(Set dst (LShiftL src shift));
12157 effect(KILL cr);
12158
12159 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12160 ins_encode %{
12161 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12162 %}
12163 ins_pipe(ialu_reg);
12164 %}
12165
12166 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12167 %{
12168 predicate(UseAPX);
12169 match(Set dst (LShiftL (LoadL src) shift));
12170 effect(KILL cr);
12171
12172 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12173 ins_encode %{
12174 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12175 %}
12176 ins_pipe(ialu_reg);
12177 %}
12178
12179 // Shift Left by 8-bit immediate
12180 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12181 %{
12182 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12183 effect(KILL cr);
12184
12185 format %{ "salq $dst, $shift" %}
12186 ins_encode %{
12187 __ salq($dst$$Address, $shift$$constant);
12188 %}
12189 ins_pipe(ialu_mem_imm);
12190 %}
12191
12192 // Shift Left by variable
12193 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12194 %{
12195 predicate(!VM_Version::supports_bmi2());
12196 match(Set dst (LShiftL dst shift));
12197 effect(KILL cr);
12198
12199 format %{ "salq $dst, $shift" %}
12200 ins_encode %{
12201 __ salq($dst$$Register);
12202 %}
12203 ins_pipe(ialu_reg_reg);
12204 %}
12205
12206 // Shift Left by variable
12207 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12208 %{
12209 predicate(!VM_Version::supports_bmi2());
12210 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12211 effect(KILL cr);
12212
12213 format %{ "salq $dst, $shift" %}
12214 ins_encode %{
12215 __ salq($dst$$Address);
12216 %}
12217 ins_pipe(ialu_mem_reg);
12218 %}
12219
12220 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12221 %{
12222 predicate(VM_Version::supports_bmi2());
12223 match(Set dst (LShiftL src shift));
12224
12225 format %{ "shlxq $dst, $src, $shift" %}
12226 ins_encode %{
12227 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12228 %}
12229 ins_pipe(ialu_reg_reg);
12230 %}
12231
12232 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12233 %{
12234 predicate(VM_Version::supports_bmi2());
12235 match(Set dst (LShiftL (LoadL src) shift));
12236 ins_cost(175);
12237 format %{ "shlxq $dst, $src, $shift" %}
12238 ins_encode %{
12239 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12240 %}
12241 ins_pipe(ialu_reg_mem);
12242 %}
12243
12244 // Arithmetic Shift Right by 8-bit immediate
12245 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12246 %{
12247 predicate(!UseAPX);
12248 match(Set dst (RShiftL dst shift));
12249 effect(KILL cr);
12250
12251 format %{ "sarq $dst, $shift" %}
12252 ins_encode %{
12253 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12254 %}
12255 ins_pipe(ialu_mem_imm);
12256 %}
12257
12258 // Arithmetic Shift Right by 8-bit immediate
12259 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12260 %{
12261 predicate(UseAPX);
12262 match(Set dst (RShiftL src shift));
12263 effect(KILL cr);
12264
12265 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12266 ins_encode %{
12267 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12268 %}
12269 ins_pipe(ialu_mem_imm);
12270 %}
12271
12272 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12273 %{
12274 predicate(UseAPX);
12275 match(Set dst (RShiftL (LoadL src) shift));
12276 effect(KILL cr);
12277
12278 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12279 ins_encode %{
12280 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12281 %}
12282 ins_pipe(ialu_mem_imm);
12283 %}
12284
12285 // Arithmetic Shift Right by 8-bit immediate
12286 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12287 %{
12288 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12289 effect(KILL cr);
12290
12291 format %{ "sarq $dst, $shift" %}
12292 ins_encode %{
12293 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12294 %}
12295 ins_pipe(ialu_mem_imm);
12296 %}
12297
12298 // Arithmetic Shift Right by variable
12299 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12300 %{
12301 predicate(!VM_Version::supports_bmi2());
12302 match(Set dst (RShiftL dst shift));
12303 effect(KILL cr);
12304
12305 format %{ "sarq $dst, $shift" %}
12306 ins_encode %{
12307 __ sarq($dst$$Register);
12308 %}
12309 ins_pipe(ialu_reg_reg);
12310 %}
12311
12312 // Arithmetic Shift Right by variable
12313 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12314 %{
12315 predicate(!VM_Version::supports_bmi2());
12316 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12317 effect(KILL cr);
12318
12319 format %{ "sarq $dst, $shift" %}
12320 ins_encode %{
12321 __ sarq($dst$$Address);
12322 %}
12323 ins_pipe(ialu_mem_reg);
12324 %}
12325
12326 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12327 %{
12328 predicate(VM_Version::supports_bmi2());
12329 match(Set dst (RShiftL src shift));
12330
12331 format %{ "sarxq $dst, $src, $shift" %}
12332 ins_encode %{
12333 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12334 %}
12335 ins_pipe(ialu_reg_reg);
12336 %}
12337
12338 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12339 %{
12340 predicate(VM_Version::supports_bmi2());
12341 match(Set dst (RShiftL (LoadL src) shift));
12342 ins_cost(175);
12343 format %{ "sarxq $dst, $src, $shift" %}
12344 ins_encode %{
12345 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12346 %}
12347 ins_pipe(ialu_reg_mem);
12348 %}
12349
12350 // Logical Shift Right by 8-bit immediate
12351 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12352 %{
12353 predicate(!UseAPX);
12354 match(Set dst (URShiftL dst shift));
12355 effect(KILL cr);
12356
12357 format %{ "shrq $dst, $shift" %}
12358 ins_encode %{
12359 __ shrq($dst$$Register, $shift$$constant);
12360 %}
12361 ins_pipe(ialu_reg);
12362 %}
12363
12364 // Logical Shift Right by 8-bit immediate
12365 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12366 %{
12367 predicate(UseAPX);
12368 match(Set dst (URShiftL src shift));
12369 effect(KILL cr);
12370
12371 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12372 ins_encode %{
12373 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12374 %}
12375 ins_pipe(ialu_reg);
12376 %}
12377
12378 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12379 %{
12380 predicate(UseAPX);
12381 match(Set dst (URShiftL (LoadL src) shift));
12382 effect(KILL cr);
12383
12384 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12385 ins_encode %{
12386 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12387 %}
12388 ins_pipe(ialu_reg);
12389 %}
12390
12391 // Logical Shift Right by 8-bit immediate
12392 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12393 %{
12394 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12395 effect(KILL cr);
12396
12397 format %{ "shrq $dst, $shift" %}
12398 ins_encode %{
12399 __ shrq($dst$$Address, $shift$$constant);
12400 %}
12401 ins_pipe(ialu_mem_imm);
12402 %}
12403
12404 // Logical Shift Right by variable
12405 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12406 %{
12407 predicate(!VM_Version::supports_bmi2());
12408 match(Set dst (URShiftL dst shift));
12409 effect(KILL cr);
12410
12411 format %{ "shrq $dst, $shift" %}
12412 ins_encode %{
12413 __ shrq($dst$$Register);
12414 %}
12415 ins_pipe(ialu_reg_reg);
12416 %}
12417
12418 // Logical Shift Right by variable
12419 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12420 %{
12421 predicate(!VM_Version::supports_bmi2());
12422 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12423 effect(KILL cr);
12424
12425 format %{ "shrq $dst, $shift" %}
12426 ins_encode %{
12427 __ shrq($dst$$Address);
12428 %}
12429 ins_pipe(ialu_mem_reg);
12430 %}
12431
12432 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12433 %{
12434 predicate(VM_Version::supports_bmi2());
12435 match(Set dst (URShiftL src shift));
12436
12437 format %{ "shrxq $dst, $src, $shift" %}
12438 ins_encode %{
12439 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12440 %}
12441 ins_pipe(ialu_reg_reg);
12442 %}
12443
12444 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12445 %{
12446 predicate(VM_Version::supports_bmi2());
12447 match(Set dst (URShiftL (LoadL src) shift));
12448 ins_cost(175);
12449 format %{ "shrxq $dst, $src, $shift" %}
12450 ins_encode %{
12451 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12452 %}
12453 ins_pipe(ialu_reg_mem);
12454 %}
12455
12456 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12457 // This idiom is used by the compiler for the i2b bytecode.
12458 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12459 %{
12460 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12461
12462 format %{ "movsbl $dst, $src\t# i2b" %}
12463 ins_encode %{
12464 __ movsbl($dst$$Register, $src$$Register);
12465 %}
12466 ins_pipe(ialu_reg_reg);
12467 %}
12468
12469 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12470 // This idiom is used by the compiler the i2s bytecode.
12471 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12472 %{
12473 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12474
12475 format %{ "movswl $dst, $src\t# i2s" %}
12476 ins_encode %{
12477 __ movswl($dst$$Register, $src$$Register);
12478 %}
12479 ins_pipe(ialu_reg_reg);
12480 %}
12481
12482 // ROL/ROR instructions
12483
12484 // Rotate left by constant.
12485 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12486 %{
12487 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12488 match(Set dst (RotateLeft dst shift));
12489 effect(KILL cr);
12490 format %{ "roll $dst, $shift" %}
12491 ins_encode %{
12492 __ roll($dst$$Register, $shift$$constant);
12493 %}
12494 ins_pipe(ialu_reg);
12495 %}
12496
12497 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12498 %{
12499 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12500 match(Set dst (RotateLeft src shift));
12501 format %{ "rolxl $dst, $src, $shift" %}
12502 ins_encode %{
12503 int shift = 32 - ($shift$$constant & 31);
12504 __ rorxl($dst$$Register, $src$$Register, shift);
12505 %}
12506 ins_pipe(ialu_reg_reg);
12507 %}
12508
12509 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12510 %{
12511 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12512 match(Set dst (RotateLeft (LoadI src) shift));
12513 ins_cost(175);
12514 format %{ "rolxl $dst, $src, $shift" %}
12515 ins_encode %{
12516 int shift = 32 - ($shift$$constant & 31);
12517 __ rorxl($dst$$Register, $src$$Address, shift);
12518 %}
12519 ins_pipe(ialu_reg_mem);
12520 %}
12521
12522 // Rotate Left by variable
12523 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12524 %{
12525 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12526 match(Set dst (RotateLeft dst shift));
12527 effect(KILL cr);
12528 format %{ "roll $dst, $shift" %}
12529 ins_encode %{
12530 __ roll($dst$$Register);
12531 %}
12532 ins_pipe(ialu_reg_reg);
12533 %}
12534
12535 // Rotate Left by variable
12536 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12537 %{
12538 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12539 match(Set dst (RotateLeft src shift));
12540 effect(KILL cr);
12541
12542 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12543 ins_encode %{
12544 __ eroll($dst$$Register, $src$$Register, false);
12545 %}
12546 ins_pipe(ialu_reg_reg);
12547 %}
12548
12549 // Rotate Right by constant.
12550 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12551 %{
12552 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12553 match(Set dst (RotateRight dst shift));
12554 effect(KILL cr);
12555 format %{ "rorl $dst, $shift" %}
12556 ins_encode %{
12557 __ rorl($dst$$Register, $shift$$constant);
12558 %}
12559 ins_pipe(ialu_reg);
12560 %}
12561
12562 // Rotate Right by constant.
12563 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12564 %{
12565 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12566 match(Set dst (RotateRight src shift));
12567 format %{ "rorxl $dst, $src, $shift" %}
12568 ins_encode %{
12569 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12570 %}
12571 ins_pipe(ialu_reg_reg);
12572 %}
12573
12574 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12575 %{
12576 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12577 match(Set dst (RotateRight (LoadI src) shift));
12578 ins_cost(175);
12579 format %{ "rorxl $dst, $src, $shift" %}
12580 ins_encode %{
12581 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12582 %}
12583 ins_pipe(ialu_reg_mem);
12584 %}
12585
12586 // Rotate Right by variable
12587 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12588 %{
12589 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12590 match(Set dst (RotateRight dst shift));
12591 effect(KILL cr);
12592 format %{ "rorl $dst, $shift" %}
12593 ins_encode %{
12594 __ rorl($dst$$Register);
12595 %}
12596 ins_pipe(ialu_reg_reg);
12597 %}
12598
12599 // Rotate Right by variable
12600 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12601 %{
12602 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12603 match(Set dst (RotateRight src shift));
12604 effect(KILL cr);
12605
12606 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12607 ins_encode %{
12608 __ erorl($dst$$Register, $src$$Register, false);
12609 %}
12610 ins_pipe(ialu_reg_reg);
12611 %}
12612
12613 // Rotate Left by constant.
12614 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12615 %{
12616 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12617 match(Set dst (RotateLeft dst shift));
12618 effect(KILL cr);
12619 format %{ "rolq $dst, $shift" %}
12620 ins_encode %{
12621 __ rolq($dst$$Register, $shift$$constant);
12622 %}
12623 ins_pipe(ialu_reg);
12624 %}
12625
12626 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12627 %{
12628 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12629 match(Set dst (RotateLeft src shift));
12630 format %{ "rolxq $dst, $src, $shift" %}
12631 ins_encode %{
12632 int shift = 64 - ($shift$$constant & 63);
12633 __ rorxq($dst$$Register, $src$$Register, shift);
12634 %}
12635 ins_pipe(ialu_reg_reg);
12636 %}
12637
12638 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12639 %{
12640 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12641 match(Set dst (RotateLeft (LoadL src) shift));
12642 ins_cost(175);
12643 format %{ "rolxq $dst, $src, $shift" %}
12644 ins_encode %{
12645 int shift = 64 - ($shift$$constant & 63);
12646 __ rorxq($dst$$Register, $src$$Address, shift);
12647 %}
12648 ins_pipe(ialu_reg_mem);
12649 %}
12650
12651 // Rotate Left by variable
12652 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12653 %{
12654 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12655 match(Set dst (RotateLeft dst shift));
12656 effect(KILL cr);
12657 format %{ "rolq $dst, $shift" %}
12658 ins_encode %{
12659 __ rolq($dst$$Register);
12660 %}
12661 ins_pipe(ialu_reg_reg);
12662 %}
12663
12664 // Rotate Left by variable
12665 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12666 %{
12667 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12668 match(Set dst (RotateLeft src shift));
12669 effect(KILL cr);
12670
12671 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12672 ins_encode %{
12673 __ erolq($dst$$Register, $src$$Register, false);
12674 %}
12675 ins_pipe(ialu_reg_reg);
12676 %}
12677
12678 // Rotate Right by constant.
12679 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12680 %{
12681 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12682 match(Set dst (RotateRight dst shift));
12683 effect(KILL cr);
12684 format %{ "rorq $dst, $shift" %}
12685 ins_encode %{
12686 __ rorq($dst$$Register, $shift$$constant);
12687 %}
12688 ins_pipe(ialu_reg);
12689 %}
12690
12691 // Rotate Right by constant
12692 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12693 %{
12694 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12695 match(Set dst (RotateRight src shift));
12696 format %{ "rorxq $dst, $src, $shift" %}
12697 ins_encode %{
12698 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12699 %}
12700 ins_pipe(ialu_reg_reg);
12701 %}
12702
12703 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12704 %{
12705 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12706 match(Set dst (RotateRight (LoadL src) shift));
12707 ins_cost(175);
12708 format %{ "rorxq $dst, $src, $shift" %}
12709 ins_encode %{
12710 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12711 %}
12712 ins_pipe(ialu_reg_mem);
12713 %}
12714
12715 // Rotate Right by variable
12716 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12717 %{
12718 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12719 match(Set dst (RotateRight dst shift));
12720 effect(KILL cr);
12721 format %{ "rorq $dst, $shift" %}
12722 ins_encode %{
12723 __ rorq($dst$$Register);
12724 %}
12725 ins_pipe(ialu_reg_reg);
12726 %}
12727
12728 // Rotate Right by variable
12729 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12730 %{
12731 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12732 match(Set dst (RotateRight src shift));
12733 effect(KILL cr);
12734
12735 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12736 ins_encode %{
12737 __ erorq($dst$$Register, $src$$Register, false);
12738 %}
12739 ins_pipe(ialu_reg_reg);
12740 %}
12741
12742 //----------------------------- CompressBits/ExpandBits ------------------------
12743
12744 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12745 predicate(n->bottom_type()->isa_long());
12746 match(Set dst (CompressBits src mask));
12747 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12748 ins_encode %{
12749 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12750 %}
12751 ins_pipe( pipe_slow );
12752 %}
12753
12754 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12755 predicate(n->bottom_type()->isa_long());
12756 match(Set dst (ExpandBits src mask));
12757 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12758 ins_encode %{
12759 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12760 %}
12761 ins_pipe( pipe_slow );
12762 %}
12763
12764 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12765 predicate(n->bottom_type()->isa_long());
12766 match(Set dst (CompressBits src (LoadL mask)));
12767 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12768 ins_encode %{
12769 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12770 %}
12771 ins_pipe( pipe_slow );
12772 %}
12773
12774 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12775 predicate(n->bottom_type()->isa_long());
12776 match(Set dst (ExpandBits src (LoadL mask)));
12777 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12778 ins_encode %{
12779 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12780 %}
12781 ins_pipe( pipe_slow );
12782 %}
12783
12784
12785 // Logical Instructions
12786
12787 // Integer Logical Instructions
12788
12789 // And Instructions
12790 // And Register with Register
12791 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12792 %{
12793 predicate(!UseAPX);
12794 match(Set dst (AndI dst src));
12795 effect(KILL cr);
12796 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12797
12798 format %{ "andl $dst, $src\t# int" %}
12799 ins_encode %{
12800 __ andl($dst$$Register, $src$$Register);
12801 %}
12802 ins_pipe(ialu_reg_reg);
12803 %}
12804
12805 // And Register with Register using New Data Destination (NDD)
12806 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12807 %{
12808 predicate(UseAPX);
12809 match(Set dst (AndI src1 src2));
12810 effect(KILL cr);
12811 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12812
12813 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12814 ins_encode %{
12815 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12816
12817 %}
12818 ins_pipe(ialu_reg_reg);
12819 %}
12820
12821 // And Register with Immediate 255
12822 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12823 %{
12824 match(Set dst (AndI src mask));
12825
12826 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12827 ins_encode %{
12828 __ movzbl($dst$$Register, $src$$Register);
12829 %}
12830 ins_pipe(ialu_reg);
12831 %}
12832
12833 // And Register with Immediate 255 and promote to long
12834 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12835 %{
12836 match(Set dst (ConvI2L (AndI src mask)));
12837
12838 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12839 ins_encode %{
12840 __ movzbl($dst$$Register, $src$$Register);
12841 %}
12842 ins_pipe(ialu_reg);
12843 %}
12844
12845 // And Register with Immediate 65535
12846 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12847 %{
12848 match(Set dst (AndI src mask));
12849
12850 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12851 ins_encode %{
12852 __ movzwl($dst$$Register, $src$$Register);
12853 %}
12854 ins_pipe(ialu_reg);
12855 %}
12856
12857 // And Register with Immediate 65535 and promote to long
12858 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12859 %{
12860 match(Set dst (ConvI2L (AndI src mask)));
12861
12862 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12863 ins_encode %{
12864 __ movzwl($dst$$Register, $src$$Register);
12865 %}
12866 ins_pipe(ialu_reg);
12867 %}
12868
12869 // Can skip int2long conversions after AND with small bitmask
12870 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12871 %{
12872 predicate(VM_Version::supports_bmi2());
12873 ins_cost(125);
12874 effect(TEMP tmp, KILL cr);
12875 match(Set dst (ConvI2L (AndI src mask)));
12876 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12877 ins_encode %{
12878 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12879 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12880 %}
12881 ins_pipe(ialu_reg_reg);
12882 %}
12883
12884 // And Register with Immediate
12885 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12886 %{
12887 predicate(!UseAPX);
12888 match(Set dst (AndI dst src));
12889 effect(KILL cr);
12890 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12891
12892 format %{ "andl $dst, $src\t# int" %}
12893 ins_encode %{
12894 __ andl($dst$$Register, $src$$constant);
12895 %}
12896 ins_pipe(ialu_reg);
12897 %}
12898
12899 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12900 %{
12901 predicate(UseAPX);
12902 match(Set dst (AndI src1 src2));
12903 effect(KILL cr);
12904 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12905
12906 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12907 ins_encode %{
12908 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12909 %}
12910 ins_pipe(ialu_reg);
12911 %}
12912
12913 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12914 %{
12915 predicate(UseAPX);
12916 match(Set dst (AndI (LoadI src1) src2));
12917 effect(KILL cr);
12918 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12919
12920 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12921 ins_encode %{
12922 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12923 %}
12924 ins_pipe(ialu_reg);
12925 %}
12926
12927 // And Register with Memory
12928 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12929 %{
12930 predicate(!UseAPX);
12931 match(Set dst (AndI dst (LoadI src)));
12932 effect(KILL cr);
12933 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12934
12935 ins_cost(150);
12936 format %{ "andl $dst, $src\t# int" %}
12937 ins_encode %{
12938 __ andl($dst$$Register, $src$$Address);
12939 %}
12940 ins_pipe(ialu_reg_mem);
12941 %}
12942
12943 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12944 %{
12945 predicate(UseAPX);
12946 match(Set dst (AndI src1 (LoadI src2)));
12947 effect(KILL cr);
12948 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12949
12950 ins_cost(150);
12951 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12952 ins_encode %{
12953 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
12954 %}
12955 ins_pipe(ialu_reg_mem);
12956 %}
12957
12958 // And Memory with Register
12959 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12960 %{
12961 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12962 effect(KILL cr);
12963 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12964
12965 ins_cost(150);
12966 format %{ "andb $dst, $src\t# byte" %}
12967 ins_encode %{
12968 __ andb($dst$$Address, $src$$Register);
12969 %}
12970 ins_pipe(ialu_mem_reg);
12971 %}
12972
12973 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12974 %{
12975 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12976 effect(KILL cr);
12977 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12978
12979 ins_cost(150);
12980 format %{ "andl $dst, $src\t# int" %}
12981 ins_encode %{
12982 __ andl($dst$$Address, $src$$Register);
12983 %}
12984 ins_pipe(ialu_mem_reg);
12985 %}
12986
12987 // And Memory with Immediate
12988 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12989 %{
12990 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12991 effect(KILL cr);
12992 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12993
12994 ins_cost(125);
12995 format %{ "andl $dst, $src\t# int" %}
12996 ins_encode %{
12997 __ andl($dst$$Address, $src$$constant);
12998 %}
12999 ins_pipe(ialu_mem_imm);
13000 %}
13001
13002 // BMI1 instructions
13003 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13004 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13005 predicate(UseBMI1Instructions);
13006 effect(KILL cr);
13007 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13008
13009 ins_cost(125);
13010 format %{ "andnl $dst, $src1, $src2" %}
13011
13012 ins_encode %{
13013 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13014 %}
13015 ins_pipe(ialu_reg_mem);
13016 %}
13017
13018 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13019 match(Set dst (AndI (XorI src1 minus_1) src2));
13020 predicate(UseBMI1Instructions);
13021 effect(KILL cr);
13022 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13023
13024 format %{ "andnl $dst, $src1, $src2" %}
13025
13026 ins_encode %{
13027 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13028 %}
13029 ins_pipe(ialu_reg);
13030 %}
13031
13032 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13033 match(Set dst (AndI (SubI imm_zero src) src));
13034 predicate(UseBMI1Instructions);
13035 effect(KILL cr);
13036 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13037
13038 format %{ "blsil $dst, $src" %}
13039
13040 ins_encode %{
13041 __ blsil($dst$$Register, $src$$Register);
13042 %}
13043 ins_pipe(ialu_reg);
13044 %}
13045
13046 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13047 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13048 predicate(UseBMI1Instructions);
13049 effect(KILL cr);
13050 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13051
13052 ins_cost(125);
13053 format %{ "blsil $dst, $src" %}
13054
13055 ins_encode %{
13056 __ blsil($dst$$Register, $src$$Address);
13057 %}
13058 ins_pipe(ialu_reg_mem);
13059 %}
13060
13061 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13062 %{
13063 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13064 predicate(UseBMI1Instructions);
13065 effect(KILL cr);
13066 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13067
13068 ins_cost(125);
13069 format %{ "blsmskl $dst, $src" %}
13070
13071 ins_encode %{
13072 __ blsmskl($dst$$Register, $src$$Address);
13073 %}
13074 ins_pipe(ialu_reg_mem);
13075 %}
13076
13077 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13078 %{
13079 match(Set dst (XorI (AddI src minus_1) src));
13080 predicate(UseBMI1Instructions);
13081 effect(KILL cr);
13082 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13083
13084 format %{ "blsmskl $dst, $src" %}
13085
13086 ins_encode %{
13087 __ blsmskl($dst$$Register, $src$$Register);
13088 %}
13089
13090 ins_pipe(ialu_reg);
13091 %}
13092
13093 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13094 %{
13095 match(Set dst (AndI (AddI src minus_1) src) );
13096 predicate(UseBMI1Instructions);
13097 effect(KILL cr);
13098 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13099
13100 format %{ "blsrl $dst, $src" %}
13101
13102 ins_encode %{
13103 __ blsrl($dst$$Register, $src$$Register);
13104 %}
13105
13106 ins_pipe(ialu_reg_mem);
13107 %}
13108
13109 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13110 %{
13111 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13112 predicate(UseBMI1Instructions);
13113 effect(KILL cr);
13114 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13115
13116 ins_cost(125);
13117 format %{ "blsrl $dst, $src" %}
13118
13119 ins_encode %{
13120 __ blsrl($dst$$Register, $src$$Address);
13121 %}
13122
13123 ins_pipe(ialu_reg);
13124 %}
13125
13126 // Or Instructions
13127 // Or Register with Register
13128 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13129 %{
13130 predicate(!UseAPX);
13131 match(Set dst (OrI dst src));
13132 effect(KILL cr);
13133 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13134
13135 format %{ "orl $dst, $src\t# int" %}
13136 ins_encode %{
13137 __ orl($dst$$Register, $src$$Register);
13138 %}
13139 ins_pipe(ialu_reg_reg);
13140 %}
13141
13142 // Or Register with Register using New Data Destination (NDD)
13143 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13144 %{
13145 predicate(UseAPX);
13146 match(Set dst (OrI src1 src2));
13147 effect(KILL cr);
13148 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13149
13150 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13151 ins_encode %{
13152 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13153 %}
13154 ins_pipe(ialu_reg_reg);
13155 %}
13156
13157 // Or Register with Immediate
13158 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13159 %{
13160 predicate(!UseAPX);
13161 match(Set dst (OrI dst src));
13162 effect(KILL cr);
13163 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13164
13165 format %{ "orl $dst, $src\t# int" %}
13166 ins_encode %{
13167 __ orl($dst$$Register, $src$$constant);
13168 %}
13169 ins_pipe(ialu_reg);
13170 %}
13171
13172 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13173 %{
13174 predicate(UseAPX);
13175 match(Set dst (OrI src1 src2));
13176 effect(KILL cr);
13177 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13178
13179 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13180 ins_encode %{
13181 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13182 %}
13183 ins_pipe(ialu_reg);
13184 %}
13185
13186 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13187 %{
13188 predicate(UseAPX);
13189 match(Set dst (OrI src1 src2));
13190 effect(KILL cr);
13191 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13192
13193 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13194 ins_encode %{
13195 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13196 %}
13197 ins_pipe(ialu_reg);
13198 %}
13199
13200 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13201 %{
13202 predicate(UseAPX);
13203 match(Set dst (OrI (LoadI src1) src2));
13204 effect(KILL cr);
13205 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13206
13207 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13208 ins_encode %{
13209 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13210 %}
13211 ins_pipe(ialu_reg);
13212 %}
13213
13214 // Or Register with Memory
13215 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13216 %{
13217 predicate(!UseAPX);
13218 match(Set dst (OrI dst (LoadI src)));
13219 effect(KILL cr);
13220 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13221
13222 ins_cost(150);
13223 format %{ "orl $dst, $src\t# int" %}
13224 ins_encode %{
13225 __ orl($dst$$Register, $src$$Address);
13226 %}
13227 ins_pipe(ialu_reg_mem);
13228 %}
13229
13230 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13231 %{
13232 predicate(UseAPX);
13233 match(Set dst (OrI src1 (LoadI src2)));
13234 effect(KILL cr);
13235 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13236
13237 ins_cost(150);
13238 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13239 ins_encode %{
13240 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13241 %}
13242 ins_pipe(ialu_reg_mem);
13243 %}
13244
13245 // Or Memory with Register
13246 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13247 %{
13248 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13249 effect(KILL cr);
13250 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13251
13252 ins_cost(150);
13253 format %{ "orb $dst, $src\t# byte" %}
13254 ins_encode %{
13255 __ orb($dst$$Address, $src$$Register);
13256 %}
13257 ins_pipe(ialu_mem_reg);
13258 %}
13259
13260 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13261 %{
13262 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13263 effect(KILL cr);
13264 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13265
13266 ins_cost(150);
13267 format %{ "orl $dst, $src\t# int" %}
13268 ins_encode %{
13269 __ orl($dst$$Address, $src$$Register);
13270 %}
13271 ins_pipe(ialu_mem_reg);
13272 %}
13273
13274 // Or Memory with Immediate
13275 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13276 %{
13277 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13278 effect(KILL cr);
13279 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13280
13281 ins_cost(125);
13282 format %{ "orl $dst, $src\t# int" %}
13283 ins_encode %{
13284 __ orl($dst$$Address, $src$$constant);
13285 %}
13286 ins_pipe(ialu_mem_imm);
13287 %}
13288
13289 // Xor Instructions
13290 // Xor Register with Register
13291 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13292 %{
13293 predicate(!UseAPX);
13294 match(Set dst (XorI dst src));
13295 effect(KILL cr);
13296 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13297
13298 format %{ "xorl $dst, $src\t# int" %}
13299 ins_encode %{
13300 __ xorl($dst$$Register, $src$$Register);
13301 %}
13302 ins_pipe(ialu_reg_reg);
13303 %}
13304
13305 // Xor Register with Register using New Data Destination (NDD)
13306 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13307 %{
13308 predicate(UseAPX);
13309 match(Set dst (XorI src1 src2));
13310 effect(KILL cr);
13311 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13312
13313 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13314 ins_encode %{
13315 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13316 %}
13317 ins_pipe(ialu_reg_reg);
13318 %}
13319
13320 // Xor Register with Immediate -1
13321 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13322 %{
13323 predicate(!UseAPX);
13324 match(Set dst (XorI dst imm));
13325
13326 format %{ "notl $dst" %}
13327 ins_encode %{
13328 __ notl($dst$$Register);
13329 %}
13330 ins_pipe(ialu_reg);
13331 %}
13332
13333 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13334 %{
13335 match(Set dst (XorI src imm));
13336 predicate(UseAPX);
13337
13338 format %{ "enotl $dst, $src" %}
13339 ins_encode %{
13340 __ enotl($dst$$Register, $src$$Register);
13341 %}
13342 ins_pipe(ialu_reg);
13343 %}
13344
13345 // Xor Register with Immediate
13346 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13347 %{
13348 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13349 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13350 match(Set dst (XorI dst src));
13351 effect(KILL cr);
13352 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13353
13354 format %{ "xorl $dst, $src\t# int" %}
13355 ins_encode %{
13356 __ xorl($dst$$Register, $src$$constant);
13357 %}
13358 ins_pipe(ialu_reg);
13359 %}
13360
13361 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13362 %{
13363 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13364 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13365 match(Set dst (XorI src1 src2));
13366 effect(KILL cr);
13367 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13368
13369 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13370 ins_encode %{
13371 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13372 %}
13373 ins_pipe(ialu_reg);
13374 %}
13375
13376 // Xor Memory with Immediate
13377 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13378 %{
13379 predicate(UseAPX);
13380 match(Set dst (XorI (LoadI src1) src2));
13381 effect(KILL cr);
13382 ins_cost(150);
13383 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13384
13385 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13386 ins_encode %{
13387 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13388 %}
13389 ins_pipe(ialu_reg);
13390 %}
13391
13392 // Xor Register with Memory
13393 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13394 %{
13395 predicate(!UseAPX);
13396 match(Set dst (XorI dst (LoadI src)));
13397 effect(KILL cr);
13398 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13399
13400 ins_cost(150);
13401 format %{ "xorl $dst, $src\t# int" %}
13402 ins_encode %{
13403 __ xorl($dst$$Register, $src$$Address);
13404 %}
13405 ins_pipe(ialu_reg_mem);
13406 %}
13407
13408 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13409 %{
13410 predicate(UseAPX);
13411 match(Set dst (XorI src1 (LoadI src2)));
13412 effect(KILL cr);
13413 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414
13415 ins_cost(150);
13416 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13417 ins_encode %{
13418 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13419 %}
13420 ins_pipe(ialu_reg_mem);
13421 %}
13422
13423 // Xor Memory with Register
13424 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13425 %{
13426 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13427 effect(KILL cr);
13428 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13429
13430 ins_cost(150);
13431 format %{ "xorb $dst, $src\t# byte" %}
13432 ins_encode %{
13433 __ xorb($dst$$Address, $src$$Register);
13434 %}
13435 ins_pipe(ialu_mem_reg);
13436 %}
13437
13438 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13439 %{
13440 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13441 effect(KILL cr);
13442 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13443
13444 ins_cost(150);
13445 format %{ "xorl $dst, $src\t# int" %}
13446 ins_encode %{
13447 __ xorl($dst$$Address, $src$$Register);
13448 %}
13449 ins_pipe(ialu_mem_reg);
13450 %}
13451
13452 // Xor Memory with Immediate
13453 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13454 %{
13455 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13456 effect(KILL cr);
13457 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13458
13459 ins_cost(125);
13460 format %{ "xorl $dst, $src\t# int" %}
13461 ins_encode %{
13462 __ xorl($dst$$Address, $src$$constant);
13463 %}
13464 ins_pipe(ialu_mem_imm);
13465 %}
13466
13467
13468 // Long Logical Instructions
13469
13470 // And Instructions
13471 // And Register with Register
13472 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13473 %{
13474 predicate(!UseAPX);
13475 match(Set dst (AndL dst src));
13476 effect(KILL cr);
13477 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13478
13479 format %{ "andq $dst, $src\t# long" %}
13480 ins_encode %{
13481 __ andq($dst$$Register, $src$$Register);
13482 %}
13483 ins_pipe(ialu_reg_reg);
13484 %}
13485
13486 // And Register with Register using New Data Destination (NDD)
13487 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13488 %{
13489 predicate(UseAPX);
13490 match(Set dst (AndL src1 src2));
13491 effect(KILL cr);
13492 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13493
13494 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13495 ins_encode %{
13496 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13497
13498 %}
13499 ins_pipe(ialu_reg_reg);
13500 %}
13501
13502 // And Register with Immediate 255
13503 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13504 %{
13505 match(Set dst (AndL src mask));
13506
13507 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13508 ins_encode %{
13509 // movzbl zeroes out the upper 32-bit and does not need REX.W
13510 __ movzbl($dst$$Register, $src$$Register);
13511 %}
13512 ins_pipe(ialu_reg);
13513 %}
13514
13515 // And Register with Immediate 65535
13516 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13517 %{
13518 match(Set dst (AndL src mask));
13519
13520 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13521 ins_encode %{
13522 // movzwl zeroes out the upper 32-bit and does not need REX.W
13523 __ movzwl($dst$$Register, $src$$Register);
13524 %}
13525 ins_pipe(ialu_reg);
13526 %}
13527
13528 // And Register with Immediate
13529 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13530 %{
13531 predicate(!UseAPX);
13532 match(Set dst (AndL dst src));
13533 effect(KILL cr);
13534 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13535
13536 format %{ "andq $dst, $src\t# long" %}
13537 ins_encode %{
13538 __ andq($dst$$Register, $src$$constant);
13539 %}
13540 ins_pipe(ialu_reg);
13541 %}
13542
13543 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13544 %{
13545 predicate(UseAPX);
13546 match(Set dst (AndL src1 src2));
13547 effect(KILL cr);
13548 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13549
13550 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13551 ins_encode %{
13552 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13553 %}
13554 ins_pipe(ialu_reg);
13555 %}
13556
13557 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13558 %{
13559 predicate(UseAPX);
13560 match(Set dst (AndL (LoadL src1) src2));
13561 effect(KILL cr);
13562 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13563
13564 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13565 ins_encode %{
13566 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13567 %}
13568 ins_pipe(ialu_reg);
13569 %}
13570
13571 // And Register with Memory
13572 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13573 %{
13574 predicate(!UseAPX);
13575 match(Set dst (AndL dst (LoadL src)));
13576 effect(KILL cr);
13577 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13578
13579 ins_cost(150);
13580 format %{ "andq $dst, $src\t# long" %}
13581 ins_encode %{
13582 __ andq($dst$$Register, $src$$Address);
13583 %}
13584 ins_pipe(ialu_reg_mem);
13585 %}
13586
13587 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13588 %{
13589 predicate(UseAPX);
13590 match(Set dst (AndL src1 (LoadL src2)));
13591 effect(KILL cr);
13592 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13593
13594 ins_cost(150);
13595 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13596 ins_encode %{
13597 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13598 %}
13599 ins_pipe(ialu_reg_mem);
13600 %}
13601
13602 // And Memory with Register
13603 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13604 %{
13605 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13606 effect(KILL cr);
13607 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13608
13609 ins_cost(150);
13610 format %{ "andq $dst, $src\t# long" %}
13611 ins_encode %{
13612 __ andq($dst$$Address, $src$$Register);
13613 %}
13614 ins_pipe(ialu_mem_reg);
13615 %}
13616
13617 // And Memory with Immediate
13618 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13619 %{
13620 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13621 effect(KILL cr);
13622 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13623
13624 ins_cost(125);
13625 format %{ "andq $dst, $src\t# long" %}
13626 ins_encode %{
13627 __ andq($dst$$Address, $src$$constant);
13628 %}
13629 ins_pipe(ialu_mem_imm);
13630 %}
13631
13632 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13633 %{
13634 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13635 // because AND/OR works well enough for 8/32-bit values.
13636 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13637
13638 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13639 effect(KILL cr);
13640
13641 ins_cost(125);
13642 format %{ "btrq $dst, log2(not($con))\t# long" %}
13643 ins_encode %{
13644 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13645 %}
13646 ins_pipe(ialu_mem_imm);
13647 %}
13648
13649 // BMI1 instructions
13650 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13651 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13652 predicate(UseBMI1Instructions);
13653 effect(KILL cr);
13654 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13655
13656 ins_cost(125);
13657 format %{ "andnq $dst, $src1, $src2" %}
13658
13659 ins_encode %{
13660 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13661 %}
13662 ins_pipe(ialu_reg_mem);
13663 %}
13664
13665 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13666 match(Set dst (AndL (XorL src1 minus_1) src2));
13667 predicate(UseBMI1Instructions);
13668 effect(KILL cr);
13669 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13670
13671 format %{ "andnq $dst, $src1, $src2" %}
13672
13673 ins_encode %{
13674 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13675 %}
13676 ins_pipe(ialu_reg_mem);
13677 %}
13678
13679 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13680 match(Set dst (AndL (SubL imm_zero src) src));
13681 predicate(UseBMI1Instructions);
13682 effect(KILL cr);
13683 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13684
13685 format %{ "blsiq $dst, $src" %}
13686
13687 ins_encode %{
13688 __ blsiq($dst$$Register, $src$$Register);
13689 %}
13690 ins_pipe(ialu_reg);
13691 %}
13692
13693 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13694 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13695 predicate(UseBMI1Instructions);
13696 effect(KILL cr);
13697 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13698
13699 ins_cost(125);
13700 format %{ "blsiq $dst, $src" %}
13701
13702 ins_encode %{
13703 __ blsiq($dst$$Register, $src$$Address);
13704 %}
13705 ins_pipe(ialu_reg_mem);
13706 %}
13707
13708 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13709 %{
13710 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13711 predicate(UseBMI1Instructions);
13712 effect(KILL cr);
13713 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13714
13715 ins_cost(125);
13716 format %{ "blsmskq $dst, $src" %}
13717
13718 ins_encode %{
13719 __ blsmskq($dst$$Register, $src$$Address);
13720 %}
13721 ins_pipe(ialu_reg_mem);
13722 %}
13723
13724 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13725 %{
13726 match(Set dst (XorL (AddL src minus_1) src));
13727 predicate(UseBMI1Instructions);
13728 effect(KILL cr);
13729 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13730
13731 format %{ "blsmskq $dst, $src" %}
13732
13733 ins_encode %{
13734 __ blsmskq($dst$$Register, $src$$Register);
13735 %}
13736
13737 ins_pipe(ialu_reg);
13738 %}
13739
13740 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13741 %{
13742 match(Set dst (AndL (AddL src minus_1) src) );
13743 predicate(UseBMI1Instructions);
13744 effect(KILL cr);
13745 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13746
13747 format %{ "blsrq $dst, $src" %}
13748
13749 ins_encode %{
13750 __ blsrq($dst$$Register, $src$$Register);
13751 %}
13752
13753 ins_pipe(ialu_reg);
13754 %}
13755
13756 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13757 %{
13758 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13759 predicate(UseBMI1Instructions);
13760 effect(KILL cr);
13761 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13762
13763 ins_cost(125);
13764 format %{ "blsrq $dst, $src" %}
13765
13766 ins_encode %{
13767 __ blsrq($dst$$Register, $src$$Address);
13768 %}
13769
13770 ins_pipe(ialu_reg);
13771 %}
13772
13773 // Or Instructions
13774 // Or Register with Register
13775 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13776 %{
13777 predicate(!UseAPX);
13778 match(Set dst (OrL dst src));
13779 effect(KILL cr);
13780 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13781
13782 format %{ "orq $dst, $src\t# long" %}
13783 ins_encode %{
13784 __ orq($dst$$Register, $src$$Register);
13785 %}
13786 ins_pipe(ialu_reg_reg);
13787 %}
13788
13789 // Or Register with Register using New Data Destination (NDD)
13790 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13791 %{
13792 predicate(UseAPX);
13793 match(Set dst (OrL src1 src2));
13794 effect(KILL cr);
13795 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13796
13797 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13798 ins_encode %{
13799 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13800
13801 %}
13802 ins_pipe(ialu_reg_reg);
13803 %}
13804
13805 // Use any_RegP to match R15 (TLS register) without spilling.
13806 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13807 match(Set dst (OrL dst (CastP2X src)));
13808 effect(KILL cr);
13809 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13810
13811 format %{ "orq $dst, $src\t# long" %}
13812 ins_encode %{
13813 __ orq($dst$$Register, $src$$Register);
13814 %}
13815 ins_pipe(ialu_reg_reg);
13816 %}
13817
13818 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13819 match(Set dst (OrL src1 (CastP2X src2)));
13820 effect(KILL cr);
13821 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13822
13823 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13824 ins_encode %{
13825 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13826 %}
13827 ins_pipe(ialu_reg_reg);
13828 %}
13829
13830 // Or Register with Immediate
13831 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13832 %{
13833 predicate(!UseAPX);
13834 match(Set dst (OrL dst src));
13835 effect(KILL cr);
13836 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13837
13838 format %{ "orq $dst, $src\t# long" %}
13839 ins_encode %{
13840 __ orq($dst$$Register, $src$$constant);
13841 %}
13842 ins_pipe(ialu_reg);
13843 %}
13844
13845 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13846 %{
13847 predicate(UseAPX);
13848 match(Set dst (OrL src1 src2));
13849 effect(KILL cr);
13850 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13851
13852 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13853 ins_encode %{
13854 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13855 %}
13856 ins_pipe(ialu_reg);
13857 %}
13858
13859 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13860 %{
13861 predicate(UseAPX);
13862 match(Set dst (OrL src1 src2));
13863 effect(KILL cr);
13864 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13865
13866 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13867 ins_encode %{
13868 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13869 %}
13870 ins_pipe(ialu_reg);
13871 %}
13872
13873 // Or Memory with Immediate
13874 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13875 %{
13876 predicate(UseAPX);
13877 match(Set dst (OrL (LoadL src1) src2));
13878 effect(KILL cr);
13879 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13880
13881 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13882 ins_encode %{
13883 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13884 %}
13885 ins_pipe(ialu_reg);
13886 %}
13887
13888 // Or Register with Memory
13889 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13890 %{
13891 predicate(!UseAPX);
13892 match(Set dst (OrL dst (LoadL src)));
13893 effect(KILL cr);
13894 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13895
13896 ins_cost(150);
13897 format %{ "orq $dst, $src\t# long" %}
13898 ins_encode %{
13899 __ orq($dst$$Register, $src$$Address);
13900 %}
13901 ins_pipe(ialu_reg_mem);
13902 %}
13903
13904 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13905 %{
13906 predicate(UseAPX);
13907 match(Set dst (OrL src1 (LoadL src2)));
13908 effect(KILL cr);
13909 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13910
13911 ins_cost(150);
13912 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13913 ins_encode %{
13914 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13915 %}
13916 ins_pipe(ialu_reg_mem);
13917 %}
13918
13919 // Or Memory with Register
13920 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13921 %{
13922 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13923 effect(KILL cr);
13924 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13925
13926 ins_cost(150);
13927 format %{ "orq $dst, $src\t# long" %}
13928 ins_encode %{
13929 __ orq($dst$$Address, $src$$Register);
13930 %}
13931 ins_pipe(ialu_mem_reg);
13932 %}
13933
13934 // Or Memory with Immediate
13935 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13936 %{
13937 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13938 effect(KILL cr);
13939 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13940
13941 ins_cost(125);
13942 format %{ "orq $dst, $src\t# long" %}
13943 ins_encode %{
13944 __ orq($dst$$Address, $src$$constant);
13945 %}
13946 ins_pipe(ialu_mem_imm);
13947 %}
13948
13949 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13950 %{
13951 // con should be a pure 64-bit power of 2 immediate
13952 // because AND/OR works well enough for 8/32-bit values.
13953 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13954
13955 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13956 effect(KILL cr);
13957
13958 ins_cost(125);
13959 format %{ "btsq $dst, log2($con)\t# long" %}
13960 ins_encode %{
13961 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13962 %}
13963 ins_pipe(ialu_mem_imm);
13964 %}
13965
13966 // Xor Instructions
13967 // Xor Register with Register
13968 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13969 %{
13970 predicate(!UseAPX);
13971 match(Set dst (XorL dst src));
13972 effect(KILL cr);
13973 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13974
13975 format %{ "xorq $dst, $src\t# long" %}
13976 ins_encode %{
13977 __ xorq($dst$$Register, $src$$Register);
13978 %}
13979 ins_pipe(ialu_reg_reg);
13980 %}
13981
13982 // Xor Register with Register using New Data Destination (NDD)
13983 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13984 %{
13985 predicate(UseAPX);
13986 match(Set dst (XorL src1 src2));
13987 effect(KILL cr);
13988 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13989
13990 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
13991 ins_encode %{
13992 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13993 %}
13994 ins_pipe(ialu_reg_reg);
13995 %}
13996
13997 // Xor Register with Immediate -1
13998 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13999 %{
14000 predicate(!UseAPX);
14001 match(Set dst (XorL dst imm));
14002
14003 format %{ "notq $dst" %}
14004 ins_encode %{
14005 __ notq($dst$$Register);
14006 %}
14007 ins_pipe(ialu_reg);
14008 %}
14009
14010 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14011 %{
14012 predicate(UseAPX);
14013 match(Set dst (XorL src imm));
14014
14015 format %{ "enotq $dst, $src" %}
14016 ins_encode %{
14017 __ enotq($dst$$Register, $src$$Register);
14018 %}
14019 ins_pipe(ialu_reg);
14020 %}
14021
14022 // Xor Register with Immediate
14023 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14024 %{
14025 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14026 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14027 match(Set dst (XorL dst src));
14028 effect(KILL cr);
14029 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14030
14031 format %{ "xorq $dst, $src\t# long" %}
14032 ins_encode %{
14033 __ xorq($dst$$Register, $src$$constant);
14034 %}
14035 ins_pipe(ialu_reg);
14036 %}
14037
14038 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14039 %{
14040 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14041 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14042 match(Set dst (XorL src1 src2));
14043 effect(KILL cr);
14044 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14045
14046 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14047 ins_encode %{
14048 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14049 %}
14050 ins_pipe(ialu_reg);
14051 %}
14052
14053 // Xor Memory with Immediate
14054 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14055 %{
14056 predicate(UseAPX);
14057 match(Set dst (XorL (LoadL src1) src2));
14058 effect(KILL cr);
14059 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14060 ins_cost(150);
14061
14062 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14063 ins_encode %{
14064 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14065 %}
14066 ins_pipe(ialu_reg);
14067 %}
14068
14069 // Xor Register with Memory
14070 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14071 %{
14072 predicate(!UseAPX);
14073 match(Set dst (XorL dst (LoadL src)));
14074 effect(KILL cr);
14075 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14076
14077 ins_cost(150);
14078 format %{ "xorq $dst, $src\t# long" %}
14079 ins_encode %{
14080 __ xorq($dst$$Register, $src$$Address);
14081 %}
14082 ins_pipe(ialu_reg_mem);
14083 %}
14084
14085 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14086 %{
14087 predicate(UseAPX);
14088 match(Set dst (XorL src1 (LoadL src2)));
14089 effect(KILL cr);
14090 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14091
14092 ins_cost(150);
14093 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14094 ins_encode %{
14095 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14096 %}
14097 ins_pipe(ialu_reg_mem);
14098 %}
14099
14100 // Xor Memory with Register
14101 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14102 %{
14103 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14104 effect(KILL cr);
14105 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14106
14107 ins_cost(150);
14108 format %{ "xorq $dst, $src\t# long" %}
14109 ins_encode %{
14110 __ xorq($dst$$Address, $src$$Register);
14111 %}
14112 ins_pipe(ialu_mem_reg);
14113 %}
14114
14115 // Xor Memory with Immediate
14116 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14117 %{
14118 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14119 effect(KILL cr);
14120 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14121
14122 ins_cost(125);
14123 format %{ "xorq $dst, $src\t# long" %}
14124 ins_encode %{
14125 __ xorq($dst$$Address, $src$$constant);
14126 %}
14127 ins_pipe(ialu_mem_imm);
14128 %}
14129
14130 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14131 %{
14132 match(Set dst (CmpLTMask p q));
14133 effect(KILL cr);
14134
14135 ins_cost(400);
14136 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14137 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14138 "negl $dst" %}
14139 ins_encode %{
14140 __ cmpl($p$$Register, $q$$Register);
14141 __ setcc(Assembler::less, $dst$$Register);
14142 __ negl($dst$$Register);
14143 %}
14144 ins_pipe(pipe_slow);
14145 %}
14146
14147 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14148 %{
14149 match(Set dst (CmpLTMask dst zero));
14150 effect(KILL cr);
14151
14152 ins_cost(100);
14153 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14154 ins_encode %{
14155 __ sarl($dst$$Register, 31);
14156 %}
14157 ins_pipe(ialu_reg);
14158 %}
14159
14160 /* Better to save a register than avoid a branch */
14161 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14162 %{
14163 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14164 effect(KILL cr);
14165 ins_cost(300);
14166 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14167 "jge done\n\t"
14168 "addl $p,$y\n"
14169 "done: " %}
14170 ins_encode %{
14171 Register Rp = $p$$Register;
14172 Register Rq = $q$$Register;
14173 Register Ry = $y$$Register;
14174 Label done;
14175 __ subl(Rp, Rq);
14176 __ jccb(Assembler::greaterEqual, done);
14177 __ addl(Rp, Ry);
14178 __ bind(done);
14179 %}
14180 ins_pipe(pipe_cmplt);
14181 %}
14182
14183 /* Better to save a register than avoid a branch */
14184 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14185 %{
14186 match(Set y (AndI (CmpLTMask p q) y));
14187 effect(KILL cr);
14188
14189 ins_cost(300);
14190
14191 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14192 "jlt done\n\t"
14193 "xorl $y, $y\n"
14194 "done: " %}
14195 ins_encode %{
14196 Register Rp = $p$$Register;
14197 Register Rq = $q$$Register;
14198 Register Ry = $y$$Register;
14199 Label done;
14200 __ cmpl(Rp, Rq);
14201 __ jccb(Assembler::less, done);
14202 __ xorl(Ry, Ry);
14203 __ bind(done);
14204 %}
14205 ins_pipe(pipe_cmplt);
14206 %}
14207
14208
14209 //---------- FP Instructions------------------------------------------------
14210
14211 // Really expensive, avoid
14212 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14213 %{
14214 match(Set cr (CmpF src1 src2));
14215
14216 ins_cost(500);
14217 format %{ "ucomiss $src1, $src2\n\t"
14218 "jnp,s exit\n\t"
14219 "pushfq\t# saw NaN, set CF\n\t"
14220 "andq [rsp], #0xffffff2b\n\t"
14221 "popfq\n"
14222 "exit:" %}
14223 ins_encode %{
14224 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14225 emit_cmpfp_fixup(masm);
14226 %}
14227 ins_pipe(pipe_slow);
14228 %}
14229
14230 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14231 match(Set cr (CmpF src1 src2));
14232
14233 ins_cost(100);
14234 format %{ "ucomiss $src1, $src2" %}
14235 ins_encode %{
14236 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14237 %}
14238 ins_pipe(pipe_slow);
14239 %}
14240
14241 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14242 match(Set cr (CmpF src1 (LoadF src2)));
14243
14244 ins_cost(100);
14245 format %{ "ucomiss $src1, $src2" %}
14246 ins_encode %{
14247 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14248 %}
14249 ins_pipe(pipe_slow);
14250 %}
14251
14252 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14253 match(Set cr (CmpF src con));
14254 ins_cost(100);
14255 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14256 ins_encode %{
14257 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14258 %}
14259 ins_pipe(pipe_slow);
14260 %}
14261
14262 // Really expensive, avoid
14263 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14264 %{
14265 match(Set cr (CmpD src1 src2));
14266
14267 ins_cost(500);
14268 format %{ "ucomisd $src1, $src2\n\t"
14269 "jnp,s exit\n\t"
14270 "pushfq\t# saw NaN, set CF\n\t"
14271 "andq [rsp], #0xffffff2b\n\t"
14272 "popfq\n"
14273 "exit:" %}
14274 ins_encode %{
14275 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14276 emit_cmpfp_fixup(masm);
14277 %}
14278 ins_pipe(pipe_slow);
14279 %}
14280
14281 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14282 match(Set cr (CmpD src1 src2));
14283
14284 ins_cost(100);
14285 format %{ "ucomisd $src1, $src2 test" %}
14286 ins_encode %{
14287 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14288 %}
14289 ins_pipe(pipe_slow);
14290 %}
14291
14292 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14293 match(Set cr (CmpD src1 (LoadD src2)));
14294
14295 ins_cost(100);
14296 format %{ "ucomisd $src1, $src2" %}
14297 ins_encode %{
14298 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14299 %}
14300 ins_pipe(pipe_slow);
14301 %}
14302
14303 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14304 match(Set cr (CmpD src con));
14305 ins_cost(100);
14306 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14307 ins_encode %{
14308 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14309 %}
14310 ins_pipe(pipe_slow);
14311 %}
14312
14313 // Compare into -1,0,1
14314 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14315 %{
14316 match(Set dst (CmpF3 src1 src2));
14317 effect(KILL cr);
14318
14319 ins_cost(275);
14320 format %{ "ucomiss $src1, $src2\n\t"
14321 "movl $dst, #-1\n\t"
14322 "jp,s done\n\t"
14323 "jb,s done\n\t"
14324 "setne $dst\n\t"
14325 "movzbl $dst, $dst\n"
14326 "done:" %}
14327 ins_encode %{
14328 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14329 emit_cmpfp3(masm, $dst$$Register);
14330 %}
14331 ins_pipe(pipe_slow);
14332 %}
14333
14334 // Compare into -1,0,1
14335 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14336 %{
14337 match(Set dst (CmpF3 src1 (LoadF src2)));
14338 effect(KILL cr);
14339
14340 ins_cost(275);
14341 format %{ "ucomiss $src1, $src2\n\t"
14342 "movl $dst, #-1\n\t"
14343 "jp,s done\n\t"
14344 "jb,s done\n\t"
14345 "setne $dst\n\t"
14346 "movzbl $dst, $dst\n"
14347 "done:" %}
14348 ins_encode %{
14349 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14350 emit_cmpfp3(masm, $dst$$Register);
14351 %}
14352 ins_pipe(pipe_slow);
14353 %}
14354
14355 // Compare into -1,0,1
14356 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14357 match(Set dst (CmpF3 src con));
14358 effect(KILL cr);
14359
14360 ins_cost(275);
14361 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14362 "movl $dst, #-1\n\t"
14363 "jp,s done\n\t"
14364 "jb,s done\n\t"
14365 "setne $dst\n\t"
14366 "movzbl $dst, $dst\n"
14367 "done:" %}
14368 ins_encode %{
14369 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14370 emit_cmpfp3(masm, $dst$$Register);
14371 %}
14372 ins_pipe(pipe_slow);
14373 %}
14374
14375 // Compare into -1,0,1
14376 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14377 %{
14378 match(Set dst (CmpD3 src1 src2));
14379 effect(KILL cr);
14380
14381 ins_cost(275);
14382 format %{ "ucomisd $src1, $src2\n\t"
14383 "movl $dst, #-1\n\t"
14384 "jp,s done\n\t"
14385 "jb,s done\n\t"
14386 "setne $dst\n\t"
14387 "movzbl $dst, $dst\n"
14388 "done:" %}
14389 ins_encode %{
14390 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14391 emit_cmpfp3(masm, $dst$$Register);
14392 %}
14393 ins_pipe(pipe_slow);
14394 %}
14395
14396 // Compare into -1,0,1
14397 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14398 %{
14399 match(Set dst (CmpD3 src1 (LoadD src2)));
14400 effect(KILL cr);
14401
14402 ins_cost(275);
14403 format %{ "ucomisd $src1, $src2\n\t"
14404 "movl $dst, #-1\n\t"
14405 "jp,s done\n\t"
14406 "jb,s done\n\t"
14407 "setne $dst\n\t"
14408 "movzbl $dst, $dst\n"
14409 "done:" %}
14410 ins_encode %{
14411 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14412 emit_cmpfp3(masm, $dst$$Register);
14413 %}
14414 ins_pipe(pipe_slow);
14415 %}
14416
14417 // Compare into -1,0,1
14418 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14419 match(Set dst (CmpD3 src con));
14420 effect(KILL cr);
14421
14422 ins_cost(275);
14423 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14424 "movl $dst, #-1\n\t"
14425 "jp,s done\n\t"
14426 "jb,s done\n\t"
14427 "setne $dst\n\t"
14428 "movzbl $dst, $dst\n"
14429 "done:" %}
14430 ins_encode %{
14431 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14432 emit_cmpfp3(masm, $dst$$Register);
14433 %}
14434 ins_pipe(pipe_slow);
14435 %}
14436
14437 //----------Arithmetic Conversion Instructions---------------------------------
14438
14439 instruct convF2D_reg_reg(regD dst, regF src)
14440 %{
14441 match(Set dst (ConvF2D src));
14442
14443 format %{ "cvtss2sd $dst, $src" %}
14444 ins_encode %{
14445 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14446 %}
14447 ins_pipe(pipe_slow); // XXX
14448 %}
14449
14450 instruct convF2D_reg_mem(regD dst, memory src)
14451 %{
14452 predicate(UseAVX == 0);
14453 match(Set dst (ConvF2D (LoadF src)));
14454
14455 format %{ "cvtss2sd $dst, $src" %}
14456 ins_encode %{
14457 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14458 %}
14459 ins_pipe(pipe_slow); // XXX
14460 %}
14461
14462 instruct convD2F_reg_reg(regF dst, regD src)
14463 %{
14464 match(Set dst (ConvD2F src));
14465
14466 format %{ "cvtsd2ss $dst, $src" %}
14467 ins_encode %{
14468 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14469 %}
14470 ins_pipe(pipe_slow); // XXX
14471 %}
14472
14473 instruct convD2F_reg_mem(regF dst, memory src)
14474 %{
14475 predicate(UseAVX == 0);
14476 match(Set dst (ConvD2F (LoadD src)));
14477
14478 format %{ "cvtsd2ss $dst, $src" %}
14479 ins_encode %{
14480 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14481 %}
14482 ins_pipe(pipe_slow); // XXX
14483 %}
14484
14485 // XXX do mem variants
14486 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14487 %{
14488 predicate(!VM_Version::supports_avx10_2());
14489 match(Set dst (ConvF2I src));
14490 effect(KILL cr);
14491 format %{ "convert_f2i $dst, $src" %}
14492 ins_encode %{
14493 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14494 %}
14495 ins_pipe(pipe_slow);
14496 %}
14497
14498 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14499 %{
14500 predicate(VM_Version::supports_avx10_2());
14501 match(Set dst (ConvF2I src));
14502 format %{ "evcvttss2sisl $dst, $src" %}
14503 ins_encode %{
14504 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14505 %}
14506 ins_pipe(pipe_slow);
14507 %}
14508
14509 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14510 %{
14511 predicate(VM_Version::supports_avx10_2());
14512 match(Set dst (ConvF2I (LoadF src)));
14513 format %{ "evcvttss2sisl $dst, $src" %}
14514 ins_encode %{
14515 __ evcvttss2sisl($dst$$Register, $src$$Address);
14516 %}
14517 ins_pipe(pipe_slow);
14518 %}
14519
14520 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14521 %{
14522 predicate(!VM_Version::supports_avx10_2());
14523 match(Set dst (ConvF2L src));
14524 effect(KILL cr);
14525 format %{ "convert_f2l $dst, $src"%}
14526 ins_encode %{
14527 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14528 %}
14529 ins_pipe(pipe_slow);
14530 %}
14531
14532 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14533 %{
14534 predicate(VM_Version::supports_avx10_2());
14535 match(Set dst (ConvF2L src));
14536 format %{ "evcvttss2sisq $dst, $src" %}
14537 ins_encode %{
14538 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14539 %}
14540 ins_pipe(pipe_slow);
14541 %}
14542
14543 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14544 %{
14545 predicate(VM_Version::supports_avx10_2());
14546 match(Set dst (ConvF2L (LoadF src)));
14547 format %{ "evcvttss2sisq $dst, $src" %}
14548 ins_encode %{
14549 __ evcvttss2sisq($dst$$Register, $src$$Address);
14550 %}
14551 ins_pipe(pipe_slow);
14552 %}
14553
14554 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14555 %{
14556 predicate(!VM_Version::supports_avx10_2());
14557 match(Set dst (ConvD2I src));
14558 effect(KILL cr);
14559 format %{ "convert_d2i $dst, $src"%}
14560 ins_encode %{
14561 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14562 %}
14563 ins_pipe(pipe_slow);
14564 %}
14565
14566 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14567 %{
14568 predicate(VM_Version::supports_avx10_2());
14569 match(Set dst (ConvD2I src));
14570 format %{ "evcvttsd2sisl $dst, $src" %}
14571 ins_encode %{
14572 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14573 %}
14574 ins_pipe(pipe_slow);
14575 %}
14576
14577 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14578 %{
14579 predicate(VM_Version::supports_avx10_2());
14580 match(Set dst (ConvD2I (LoadD src)));
14581 format %{ "evcvttsd2sisl $dst, $src" %}
14582 ins_encode %{
14583 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14584 %}
14585 ins_pipe(pipe_slow);
14586 %}
14587
14588 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14589 %{
14590 predicate(!VM_Version::supports_avx10_2());
14591 match(Set dst (ConvD2L src));
14592 effect(KILL cr);
14593 format %{ "convert_d2l $dst, $src"%}
14594 ins_encode %{
14595 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14596 %}
14597 ins_pipe(pipe_slow);
14598 %}
14599
14600 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14601 %{
14602 predicate(VM_Version::supports_avx10_2());
14603 match(Set dst (ConvD2L src));
14604 format %{ "evcvttsd2sisq $dst, $src" %}
14605 ins_encode %{
14606 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14607 %}
14608 ins_pipe(pipe_slow);
14609 %}
14610
14611 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14612 %{
14613 predicate(VM_Version::supports_avx10_2());
14614 match(Set dst (ConvD2L (LoadD src)));
14615 format %{ "evcvttsd2sisq $dst, $src" %}
14616 ins_encode %{
14617 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14618 %}
14619 ins_pipe(pipe_slow);
14620 %}
14621
14622 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14623 %{
14624 match(Set dst (RoundD src));
14625 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14626 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14627 ins_encode %{
14628 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14629 %}
14630 ins_pipe(pipe_slow);
14631 %}
14632
14633 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14634 %{
14635 match(Set dst (RoundF src));
14636 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14637 format %{ "round_float $dst,$src" %}
14638 ins_encode %{
14639 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14640 %}
14641 ins_pipe(pipe_slow);
14642 %}
14643
14644 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14645 %{
14646 predicate(!UseXmmI2F);
14647 match(Set dst (ConvI2F src));
14648
14649 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14650 ins_encode %{
14651 if (UseAVX > 0) {
14652 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14653 }
14654 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14655 %}
14656 ins_pipe(pipe_slow); // XXX
14657 %}
14658
14659 instruct convI2F_reg_mem(regF dst, memory src)
14660 %{
14661 predicate(UseAVX == 0);
14662 match(Set dst (ConvI2F (LoadI src)));
14663
14664 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14665 ins_encode %{
14666 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14667 %}
14668 ins_pipe(pipe_slow); // XXX
14669 %}
14670
14671 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14672 %{
14673 predicate(!UseXmmI2D);
14674 match(Set dst (ConvI2D src));
14675
14676 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14677 ins_encode %{
14678 if (UseAVX > 0) {
14679 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14680 }
14681 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14682 %}
14683 ins_pipe(pipe_slow); // XXX
14684 %}
14685
14686 instruct convI2D_reg_mem(regD dst, memory src)
14687 %{
14688 predicate(UseAVX == 0);
14689 match(Set dst (ConvI2D (LoadI src)));
14690
14691 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14692 ins_encode %{
14693 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14694 %}
14695 ins_pipe(pipe_slow); // XXX
14696 %}
14697
14698 instruct convXI2F_reg(regF dst, rRegI src)
14699 %{
14700 predicate(UseXmmI2F);
14701 match(Set dst (ConvI2F src));
14702
14703 format %{ "movdl $dst, $src\n\t"
14704 "cvtdq2psl $dst, $dst\t# i2f" %}
14705 ins_encode %{
14706 __ movdl($dst$$XMMRegister, $src$$Register);
14707 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14708 %}
14709 ins_pipe(pipe_slow); // XXX
14710 %}
14711
14712 instruct convXI2D_reg(regD dst, rRegI src)
14713 %{
14714 predicate(UseXmmI2D);
14715 match(Set dst (ConvI2D src));
14716
14717 format %{ "movdl $dst, $src\n\t"
14718 "cvtdq2pdl $dst, $dst\t# i2d" %}
14719 ins_encode %{
14720 __ movdl($dst$$XMMRegister, $src$$Register);
14721 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14722 %}
14723 ins_pipe(pipe_slow); // XXX
14724 %}
14725
14726 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14727 %{
14728 match(Set dst (ConvL2F src));
14729
14730 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14731 ins_encode %{
14732 if (UseAVX > 0) {
14733 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14734 }
14735 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14736 %}
14737 ins_pipe(pipe_slow); // XXX
14738 %}
14739
14740 instruct convL2F_reg_mem(regF dst, memory src)
14741 %{
14742 predicate(UseAVX == 0);
14743 match(Set dst (ConvL2F (LoadL src)));
14744
14745 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14746 ins_encode %{
14747 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14748 %}
14749 ins_pipe(pipe_slow); // XXX
14750 %}
14751
14752 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14753 %{
14754 match(Set dst (ConvL2D src));
14755
14756 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14757 ins_encode %{
14758 if (UseAVX > 0) {
14759 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14760 }
14761 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14762 %}
14763 ins_pipe(pipe_slow); // XXX
14764 %}
14765
14766 instruct convL2D_reg_mem(regD dst, memory src)
14767 %{
14768 predicate(UseAVX == 0);
14769 match(Set dst (ConvL2D (LoadL src)));
14770
14771 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14772 ins_encode %{
14773 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14774 %}
14775 ins_pipe(pipe_slow); // XXX
14776 %}
14777
14778 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14779 %{
14780 match(Set dst (ConvI2L src));
14781
14782 ins_cost(125);
14783 format %{ "movslq $dst, $src\t# i2l" %}
14784 ins_encode %{
14785 __ movslq($dst$$Register, $src$$Register);
14786 %}
14787 ins_pipe(ialu_reg_reg);
14788 %}
14789
14790 // Zero-extend convert int to long
14791 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14792 %{
14793 match(Set dst (AndL (ConvI2L src) mask));
14794
14795 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14796 ins_encode %{
14797 if ($dst$$reg != $src$$reg) {
14798 __ movl($dst$$Register, $src$$Register);
14799 }
14800 %}
14801 ins_pipe(ialu_reg_reg);
14802 %}
14803
14804 // Zero-extend convert int to long
14805 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14806 %{
14807 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14808
14809 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14810 ins_encode %{
14811 __ movl($dst$$Register, $src$$Address);
14812 %}
14813 ins_pipe(ialu_reg_mem);
14814 %}
14815
14816 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14817 %{
14818 match(Set dst (AndL src mask));
14819
14820 format %{ "movl $dst, $src\t# zero-extend long" %}
14821 ins_encode %{
14822 __ movl($dst$$Register, $src$$Register);
14823 %}
14824 ins_pipe(ialu_reg_reg);
14825 %}
14826
14827 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14828 %{
14829 match(Set dst (ConvL2I src));
14830
14831 format %{ "movl $dst, $src\t# l2i" %}
14832 ins_encode %{
14833 __ movl($dst$$Register, $src$$Register);
14834 %}
14835 ins_pipe(ialu_reg_reg);
14836 %}
14837
14838
14839 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14840 match(Set dst (MoveF2I src));
14841 effect(DEF dst, USE src);
14842
14843 ins_cost(125);
14844 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14845 ins_encode %{
14846 __ movl($dst$$Register, Address(rsp, $src$$disp));
14847 %}
14848 ins_pipe(ialu_reg_mem);
14849 %}
14850
14851 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14852 match(Set dst (MoveI2F src));
14853 effect(DEF dst, USE src);
14854
14855 ins_cost(125);
14856 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14857 ins_encode %{
14858 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14859 %}
14860 ins_pipe(pipe_slow);
14861 %}
14862
14863 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14864 match(Set dst (MoveD2L src));
14865 effect(DEF dst, USE src);
14866
14867 ins_cost(125);
14868 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14869 ins_encode %{
14870 __ movq($dst$$Register, Address(rsp, $src$$disp));
14871 %}
14872 ins_pipe(ialu_reg_mem);
14873 %}
14874
14875 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14876 predicate(!UseXmmLoadAndClearUpper);
14877 match(Set dst (MoveL2D src));
14878 effect(DEF dst, USE src);
14879
14880 ins_cost(125);
14881 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14882 ins_encode %{
14883 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14884 %}
14885 ins_pipe(pipe_slow);
14886 %}
14887
14888 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14889 predicate(UseXmmLoadAndClearUpper);
14890 match(Set dst (MoveL2D src));
14891 effect(DEF dst, USE src);
14892
14893 ins_cost(125);
14894 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14895 ins_encode %{
14896 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14897 %}
14898 ins_pipe(pipe_slow);
14899 %}
14900
14901
14902 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14903 match(Set dst (MoveF2I src));
14904 effect(DEF dst, USE src);
14905
14906 ins_cost(95); // XXX
14907 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14908 ins_encode %{
14909 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14910 %}
14911 ins_pipe(pipe_slow);
14912 %}
14913
14914 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14915 match(Set dst (MoveI2F src));
14916 effect(DEF dst, USE src);
14917
14918 ins_cost(100);
14919 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14920 ins_encode %{
14921 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14922 %}
14923 ins_pipe( ialu_mem_reg );
14924 %}
14925
14926 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14927 match(Set dst (MoveD2L src));
14928 effect(DEF dst, USE src);
14929
14930 ins_cost(95); // XXX
14931 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14932 ins_encode %{
14933 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14934 %}
14935 ins_pipe(pipe_slow);
14936 %}
14937
14938 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14939 match(Set dst (MoveL2D src));
14940 effect(DEF dst, USE src);
14941
14942 ins_cost(100);
14943 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14944 ins_encode %{
14945 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14946 %}
14947 ins_pipe(ialu_mem_reg);
14948 %}
14949
14950 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14951 match(Set dst (MoveF2I src));
14952 effect(DEF dst, USE src);
14953 ins_cost(85);
14954 format %{ "movd $dst,$src\t# MoveF2I" %}
14955 ins_encode %{
14956 __ movdl($dst$$Register, $src$$XMMRegister);
14957 %}
14958 ins_pipe( pipe_slow );
14959 %}
14960
14961 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14962 match(Set dst (MoveD2L src));
14963 effect(DEF dst, USE src);
14964 ins_cost(85);
14965 format %{ "movd $dst,$src\t# MoveD2L" %}
14966 ins_encode %{
14967 __ movdq($dst$$Register, $src$$XMMRegister);
14968 %}
14969 ins_pipe( pipe_slow );
14970 %}
14971
14972 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14973 match(Set dst (MoveI2F src));
14974 effect(DEF dst, USE src);
14975 ins_cost(100);
14976 format %{ "movd $dst,$src\t# MoveI2F" %}
14977 ins_encode %{
14978 __ movdl($dst$$XMMRegister, $src$$Register);
14979 %}
14980 ins_pipe( pipe_slow );
14981 %}
14982
14983 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14984 match(Set dst (MoveL2D src));
14985 effect(DEF dst, USE src);
14986 ins_cost(100);
14987 format %{ "movd $dst,$src\t# MoveL2D" %}
14988 ins_encode %{
14989 __ movdq($dst$$XMMRegister, $src$$Register);
14990 %}
14991 ins_pipe( pipe_slow );
14992 %}
14993
14994 // Fast clearing of an array
14995 // Small non-constant lenght ClearArray for non-AVX512 targets.
14996 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14997 Universe dummy, rFlagsReg cr)
14998 %{
14999 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15000 match(Set dummy (ClearArray cnt base));
15001 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15002
15003 format %{ $$template
15004 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15005 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15006 $$emit$$"jg LARGE\n\t"
15007 $$emit$$"dec rcx\n\t"
15008 $$emit$$"js DONE\t# Zero length\n\t"
15009 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15010 $$emit$$"dec rcx\n\t"
15011 $$emit$$"jge LOOP\n\t"
15012 $$emit$$"jmp DONE\n\t"
15013 $$emit$$"# LARGE:\n\t"
15014 if (UseFastStosb) {
15015 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15016 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15017 } else if (UseXMMForObjInit) {
15018 $$emit$$"mov rdi,rax\n\t"
15019 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15020 $$emit$$"jmpq L_zero_64_bytes\n\t"
15021 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15022 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15023 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15024 $$emit$$"add 0x40,rax\n\t"
15025 $$emit$$"# L_zero_64_bytes:\n\t"
15026 $$emit$$"sub 0x8,rcx\n\t"
15027 $$emit$$"jge L_loop\n\t"
15028 $$emit$$"add 0x4,rcx\n\t"
15029 $$emit$$"jl L_tail\n\t"
15030 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15031 $$emit$$"add 0x20,rax\n\t"
15032 $$emit$$"sub 0x4,rcx\n\t"
15033 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15034 $$emit$$"add 0x4,rcx\n\t"
15035 $$emit$$"jle L_end\n\t"
15036 $$emit$$"dec rcx\n\t"
15037 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15038 $$emit$$"vmovq xmm0,(rax)\n\t"
15039 $$emit$$"add 0x8,rax\n\t"
15040 $$emit$$"dec rcx\n\t"
15041 $$emit$$"jge L_sloop\n\t"
15042 $$emit$$"# L_end:\n\t"
15043 } else {
15044 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15045 }
15046 $$emit$$"# DONE"
15047 %}
15048 ins_encode %{
15049 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15050 $tmp$$XMMRegister, false, knoreg);
15051 %}
15052 ins_pipe(pipe_slow);
15053 %}
15054
15055 // Small non-constant length ClearArray for AVX512 targets.
15056 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15057 Universe dummy, rFlagsReg cr)
15058 %{
15059 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15060 match(Set dummy (ClearArray cnt base));
15061 ins_cost(125);
15062 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15063
15064 format %{ $$template
15065 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15066 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15067 $$emit$$"jg LARGE\n\t"
15068 $$emit$$"dec rcx\n\t"
15069 $$emit$$"js DONE\t# Zero length\n\t"
15070 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15071 $$emit$$"dec rcx\n\t"
15072 $$emit$$"jge LOOP\n\t"
15073 $$emit$$"jmp DONE\n\t"
15074 $$emit$$"# LARGE:\n\t"
15075 if (UseFastStosb) {
15076 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15077 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15078 } else if (UseXMMForObjInit) {
15079 $$emit$$"mov rdi,rax\n\t"
15080 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15081 $$emit$$"jmpq L_zero_64_bytes\n\t"
15082 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15083 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15084 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15085 $$emit$$"add 0x40,rax\n\t"
15086 $$emit$$"# L_zero_64_bytes:\n\t"
15087 $$emit$$"sub 0x8,rcx\n\t"
15088 $$emit$$"jge L_loop\n\t"
15089 $$emit$$"add 0x4,rcx\n\t"
15090 $$emit$$"jl L_tail\n\t"
15091 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15092 $$emit$$"add 0x20,rax\n\t"
15093 $$emit$$"sub 0x4,rcx\n\t"
15094 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15095 $$emit$$"add 0x4,rcx\n\t"
15096 $$emit$$"jle L_end\n\t"
15097 $$emit$$"dec rcx\n\t"
15098 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15099 $$emit$$"vmovq xmm0,(rax)\n\t"
15100 $$emit$$"add 0x8,rax\n\t"
15101 $$emit$$"dec rcx\n\t"
15102 $$emit$$"jge L_sloop\n\t"
15103 $$emit$$"# L_end:\n\t"
15104 } else {
15105 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15106 }
15107 $$emit$$"# DONE"
15108 %}
15109 ins_encode %{
15110 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15111 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15112 %}
15113 ins_pipe(pipe_slow);
15114 %}
15115
15116 // Large non-constant length ClearArray for non-AVX512 targets.
15117 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15118 Universe dummy, rFlagsReg cr)
15119 %{
15120 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15121 match(Set dummy (ClearArray cnt base));
15122 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15123
15124 format %{ $$template
15125 if (UseFastStosb) {
15126 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15127 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15128 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15129 } else if (UseXMMForObjInit) {
15130 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15131 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15132 $$emit$$"jmpq L_zero_64_bytes\n\t"
15133 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15134 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15135 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15136 $$emit$$"add 0x40,rax\n\t"
15137 $$emit$$"# L_zero_64_bytes:\n\t"
15138 $$emit$$"sub 0x8,rcx\n\t"
15139 $$emit$$"jge L_loop\n\t"
15140 $$emit$$"add 0x4,rcx\n\t"
15141 $$emit$$"jl L_tail\n\t"
15142 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15143 $$emit$$"add 0x20,rax\n\t"
15144 $$emit$$"sub 0x4,rcx\n\t"
15145 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15146 $$emit$$"add 0x4,rcx\n\t"
15147 $$emit$$"jle L_end\n\t"
15148 $$emit$$"dec rcx\n\t"
15149 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15150 $$emit$$"vmovq xmm0,(rax)\n\t"
15151 $$emit$$"add 0x8,rax\n\t"
15152 $$emit$$"dec rcx\n\t"
15153 $$emit$$"jge L_sloop\n\t"
15154 $$emit$$"# L_end:\n\t"
15155 } else {
15156 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15157 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15158 }
15159 %}
15160 ins_encode %{
15161 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15162 $tmp$$XMMRegister, true, knoreg);
15163 %}
15164 ins_pipe(pipe_slow);
15165 %}
15166
15167 // Large non-constant length ClearArray for AVX512 targets.
15168 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15169 Universe dummy, rFlagsReg cr)
15170 %{
15171 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15172 match(Set dummy (ClearArray cnt base));
15173 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15174
15175 format %{ $$template
15176 if (UseFastStosb) {
15177 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15178 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15179 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15180 } else if (UseXMMForObjInit) {
15181 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15182 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15183 $$emit$$"jmpq L_zero_64_bytes\n\t"
15184 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15185 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15186 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15187 $$emit$$"add 0x40,rax\n\t"
15188 $$emit$$"# L_zero_64_bytes:\n\t"
15189 $$emit$$"sub 0x8,rcx\n\t"
15190 $$emit$$"jge L_loop\n\t"
15191 $$emit$$"add 0x4,rcx\n\t"
15192 $$emit$$"jl L_tail\n\t"
15193 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15194 $$emit$$"add 0x20,rax\n\t"
15195 $$emit$$"sub 0x4,rcx\n\t"
15196 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15197 $$emit$$"add 0x4,rcx\n\t"
15198 $$emit$$"jle L_end\n\t"
15199 $$emit$$"dec rcx\n\t"
15200 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15201 $$emit$$"vmovq xmm0,(rax)\n\t"
15202 $$emit$$"add 0x8,rax\n\t"
15203 $$emit$$"dec rcx\n\t"
15204 $$emit$$"jge L_sloop\n\t"
15205 $$emit$$"# L_end:\n\t"
15206 } else {
15207 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15208 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15209 }
15210 %}
15211 ins_encode %{
15212 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15213 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15214 %}
15215 ins_pipe(pipe_slow);
15216 %}
15217
15218 // Small constant length ClearArray for AVX512 targets.
15219 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15220 %{
15221 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15222 match(Set dummy (ClearArray cnt base));
15223 ins_cost(100);
15224 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15225 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15226 ins_encode %{
15227 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15228 %}
15229 ins_pipe(pipe_slow);
15230 %}
15231
15232 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15233 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15234 %{
15235 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15236 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15237 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15238
15239 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15240 ins_encode %{
15241 __ string_compare($str1$$Register, $str2$$Register,
15242 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15243 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15244 %}
15245 ins_pipe( pipe_slow );
15246 %}
15247
15248 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15249 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15250 %{
15251 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15252 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15253 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15254
15255 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15256 ins_encode %{
15257 __ string_compare($str1$$Register, $str2$$Register,
15258 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15259 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15260 %}
15261 ins_pipe( pipe_slow );
15262 %}
15263
15264 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15265 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15266 %{
15267 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15268 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15269 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15270
15271 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15272 ins_encode %{
15273 __ string_compare($str1$$Register, $str2$$Register,
15274 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15275 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15276 %}
15277 ins_pipe( pipe_slow );
15278 %}
15279
15280 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15281 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15282 %{
15283 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15284 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15285 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15286
15287 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15288 ins_encode %{
15289 __ string_compare($str1$$Register, $str2$$Register,
15290 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15291 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15292 %}
15293 ins_pipe( pipe_slow );
15294 %}
15295
15296 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15297 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15298 %{
15299 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15300 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15301 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15302
15303 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15304 ins_encode %{
15305 __ string_compare($str1$$Register, $str2$$Register,
15306 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15307 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15308 %}
15309 ins_pipe( pipe_slow );
15310 %}
15311
15312 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15313 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15314 %{
15315 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15316 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15317 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15318
15319 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15320 ins_encode %{
15321 __ string_compare($str1$$Register, $str2$$Register,
15322 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15323 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15324 %}
15325 ins_pipe( pipe_slow );
15326 %}
15327
15328 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15329 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15330 %{
15331 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15332 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15333 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15334
15335 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15336 ins_encode %{
15337 __ string_compare($str2$$Register, $str1$$Register,
15338 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15339 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15340 %}
15341 ins_pipe( pipe_slow );
15342 %}
15343
15344 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15345 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15346 %{
15347 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15348 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15349 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15350
15351 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15352 ins_encode %{
15353 __ string_compare($str2$$Register, $str1$$Register,
15354 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15355 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15356 %}
15357 ins_pipe( pipe_slow );
15358 %}
15359
15360 // fast search of substring with known size.
15361 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15362 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15363 %{
15364 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15365 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15366 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15367
15368 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15369 ins_encode %{
15370 int icnt2 = (int)$int_cnt2$$constant;
15371 if (icnt2 >= 16) {
15372 // IndexOf for constant substrings with size >= 16 elements
15373 // which don't need to be loaded through stack.
15374 __ string_indexofC8($str1$$Register, $str2$$Register,
15375 $cnt1$$Register, $cnt2$$Register,
15376 icnt2, $result$$Register,
15377 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15378 } else {
15379 // Small strings are loaded through stack if they cross page boundary.
15380 __ string_indexof($str1$$Register, $str2$$Register,
15381 $cnt1$$Register, $cnt2$$Register,
15382 icnt2, $result$$Register,
15383 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15384 }
15385 %}
15386 ins_pipe( pipe_slow );
15387 %}
15388
15389 // fast search of substring with known size.
15390 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15391 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15392 %{
15393 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15394 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15395 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15396
15397 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15398 ins_encode %{
15399 int icnt2 = (int)$int_cnt2$$constant;
15400 if (icnt2 >= 8) {
15401 // IndexOf for constant substrings with size >= 8 elements
15402 // which don't need to be loaded through stack.
15403 __ string_indexofC8($str1$$Register, $str2$$Register,
15404 $cnt1$$Register, $cnt2$$Register,
15405 icnt2, $result$$Register,
15406 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15407 } else {
15408 // Small strings are loaded through stack if they cross page boundary.
15409 __ string_indexof($str1$$Register, $str2$$Register,
15410 $cnt1$$Register, $cnt2$$Register,
15411 icnt2, $result$$Register,
15412 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15413 }
15414 %}
15415 ins_pipe( pipe_slow );
15416 %}
15417
15418 // fast search of substring with known size.
15419 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15420 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15421 %{
15422 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15423 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15424 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15425
15426 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15427 ins_encode %{
15428 int icnt2 = (int)$int_cnt2$$constant;
15429 if (icnt2 >= 8) {
15430 // IndexOf for constant substrings with size >= 8 elements
15431 // which don't need to be loaded through stack.
15432 __ string_indexofC8($str1$$Register, $str2$$Register,
15433 $cnt1$$Register, $cnt2$$Register,
15434 icnt2, $result$$Register,
15435 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15436 } else {
15437 // Small strings are loaded through stack if they cross page boundary.
15438 __ string_indexof($str1$$Register, $str2$$Register,
15439 $cnt1$$Register, $cnt2$$Register,
15440 icnt2, $result$$Register,
15441 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15442 }
15443 %}
15444 ins_pipe( pipe_slow );
15445 %}
15446
15447 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15448 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15449 %{
15450 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15451 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15452 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15453
15454 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15455 ins_encode %{
15456 __ string_indexof($str1$$Register, $str2$$Register,
15457 $cnt1$$Register, $cnt2$$Register,
15458 (-1), $result$$Register,
15459 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15460 %}
15461 ins_pipe( pipe_slow );
15462 %}
15463
15464 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15465 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15466 %{
15467 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15468 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15469 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15470
15471 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15472 ins_encode %{
15473 __ string_indexof($str1$$Register, $str2$$Register,
15474 $cnt1$$Register, $cnt2$$Register,
15475 (-1), $result$$Register,
15476 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15477 %}
15478 ins_pipe( pipe_slow );
15479 %}
15480
15481 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15482 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15483 %{
15484 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15485 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15486 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15487
15488 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15489 ins_encode %{
15490 __ string_indexof($str1$$Register, $str2$$Register,
15491 $cnt1$$Register, $cnt2$$Register,
15492 (-1), $result$$Register,
15493 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15494 %}
15495 ins_pipe( pipe_slow );
15496 %}
15497
15498 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15499 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15500 %{
15501 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15502 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15503 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15504 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15505 ins_encode %{
15506 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15507 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15508 %}
15509 ins_pipe( pipe_slow );
15510 %}
15511
15512 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15513 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15514 %{
15515 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15516 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15517 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15518 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15519 ins_encode %{
15520 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15521 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15522 %}
15523 ins_pipe( pipe_slow );
15524 %}
15525
15526 // fast string equals
15527 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15528 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15529 %{
15530 predicate(!VM_Version::supports_avx512vlbw());
15531 match(Set result (StrEquals (Binary str1 str2) cnt));
15532 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15533
15534 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15535 ins_encode %{
15536 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15537 $cnt$$Register, $result$$Register, $tmp3$$Register,
15538 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15539 %}
15540 ins_pipe( pipe_slow );
15541 %}
15542
15543 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15544 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15545 %{
15546 predicate(VM_Version::supports_avx512vlbw());
15547 match(Set result (StrEquals (Binary str1 str2) cnt));
15548 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15549
15550 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15551 ins_encode %{
15552 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15553 $cnt$$Register, $result$$Register, $tmp3$$Register,
15554 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15555 %}
15556 ins_pipe( pipe_slow );
15557 %}
15558
15559 // fast array equals
15560 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15561 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15562 %{
15563 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15564 match(Set result (AryEq ary1 ary2));
15565 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15566
15567 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15568 ins_encode %{
15569 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15570 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15571 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15572 %}
15573 ins_pipe( pipe_slow );
15574 %}
15575
15576 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15577 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15578 %{
15579 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15580 match(Set result (AryEq ary1 ary2));
15581 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15582
15583 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15584 ins_encode %{
15585 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15586 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15587 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15588 %}
15589 ins_pipe( pipe_slow );
15590 %}
15591
15592 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15593 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15594 %{
15595 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15596 match(Set result (AryEq ary1 ary2));
15597 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15598
15599 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15600 ins_encode %{
15601 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15602 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15603 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15604 %}
15605 ins_pipe( pipe_slow );
15606 %}
15607
15608 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15609 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15610 %{
15611 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15612 match(Set result (AryEq ary1 ary2));
15613 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15614
15615 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15616 ins_encode %{
15617 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15618 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15619 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15620 %}
15621 ins_pipe( pipe_slow );
15622 %}
15623
15624 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15625 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15626 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15627 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15628 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15629 %{
15630 predicate(UseAVX >= 2);
15631 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15632 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15633 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15634 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15635 USE basic_type, KILL cr);
15636
15637 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15638 ins_encode %{
15639 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15640 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15641 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15642 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15643 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15644 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15645 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15646 %}
15647 ins_pipe( pipe_slow );
15648 %}
15649
15650 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15651 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15652 %{
15653 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15654 match(Set result (CountPositives ary1 len));
15655 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15656
15657 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15658 ins_encode %{
15659 __ count_positives($ary1$$Register, $len$$Register,
15660 $result$$Register, $tmp3$$Register,
15661 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15662 %}
15663 ins_pipe( pipe_slow );
15664 %}
15665
15666 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15667 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15668 %{
15669 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15670 match(Set result (CountPositives ary1 len));
15671 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15672
15673 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15674 ins_encode %{
15675 __ count_positives($ary1$$Register, $len$$Register,
15676 $result$$Register, $tmp3$$Register,
15677 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15678 %}
15679 ins_pipe( pipe_slow );
15680 %}
15681
15682 // fast char[] to byte[] compression
15683 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15684 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15685 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15686 match(Set result (StrCompressedCopy src (Binary dst len)));
15687 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15688 USE_KILL len, KILL tmp5, KILL cr);
15689
15690 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15691 ins_encode %{
15692 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15693 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15694 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15695 knoreg, knoreg);
15696 %}
15697 ins_pipe( pipe_slow );
15698 %}
15699
15700 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15701 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15702 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15703 match(Set result (StrCompressedCopy src (Binary dst len)));
15704 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15705 USE_KILL len, KILL tmp5, KILL cr);
15706
15707 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15708 ins_encode %{
15709 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15710 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15711 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15712 $ktmp1$$KRegister, $ktmp2$$KRegister);
15713 %}
15714 ins_pipe( pipe_slow );
15715 %}
15716 // fast byte[] to char[] inflation
15717 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15718 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15719 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15720 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15721 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15722
15723 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15724 ins_encode %{
15725 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15726 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15727 %}
15728 ins_pipe( pipe_slow );
15729 %}
15730
15731 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15732 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15733 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15734 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15735 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15736
15737 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15738 ins_encode %{
15739 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15740 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15741 %}
15742 ins_pipe( pipe_slow );
15743 %}
15744
15745 // encode char[] to byte[] in ISO_8859_1
15746 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15747 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15748 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15749 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15750 match(Set result (EncodeISOArray src (Binary dst len)));
15751 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15752
15753 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15754 ins_encode %{
15755 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15756 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15757 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15758 %}
15759 ins_pipe( pipe_slow );
15760 %}
15761
15762 // encode char[] to byte[] in ASCII
15763 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15764 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15765 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15766 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15767 match(Set result (EncodeISOArray src (Binary dst len)));
15768 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15769
15770 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15771 ins_encode %{
15772 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15773 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15774 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15775 %}
15776 ins_pipe( pipe_slow );
15777 %}
15778
15779 //----------Overflow Math Instructions-----------------------------------------
15780
15781 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15782 %{
15783 match(Set cr (OverflowAddI op1 op2));
15784 effect(DEF cr, USE_KILL op1, USE op2);
15785
15786 format %{ "addl $op1, $op2\t# overflow check int" %}
15787
15788 ins_encode %{
15789 __ addl($op1$$Register, $op2$$Register);
15790 %}
15791 ins_pipe(ialu_reg_reg);
15792 %}
15793
15794 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15795 %{
15796 match(Set cr (OverflowAddI op1 op2));
15797 effect(DEF cr, USE_KILL op1, USE op2);
15798
15799 format %{ "addl $op1, $op2\t# overflow check int" %}
15800
15801 ins_encode %{
15802 __ addl($op1$$Register, $op2$$constant);
15803 %}
15804 ins_pipe(ialu_reg_reg);
15805 %}
15806
15807 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15808 %{
15809 match(Set cr (OverflowAddL op1 op2));
15810 effect(DEF cr, USE_KILL op1, USE op2);
15811
15812 format %{ "addq $op1, $op2\t# overflow check long" %}
15813 ins_encode %{
15814 __ addq($op1$$Register, $op2$$Register);
15815 %}
15816 ins_pipe(ialu_reg_reg);
15817 %}
15818
15819 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15820 %{
15821 match(Set cr (OverflowAddL op1 op2));
15822 effect(DEF cr, USE_KILL op1, USE op2);
15823
15824 format %{ "addq $op1, $op2\t# overflow check long" %}
15825 ins_encode %{
15826 __ addq($op1$$Register, $op2$$constant);
15827 %}
15828 ins_pipe(ialu_reg_reg);
15829 %}
15830
15831 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15832 %{
15833 match(Set cr (OverflowSubI op1 op2));
15834
15835 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15836 ins_encode %{
15837 __ cmpl($op1$$Register, $op2$$Register);
15838 %}
15839 ins_pipe(ialu_reg_reg);
15840 %}
15841
15842 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15843 %{
15844 match(Set cr (OverflowSubI op1 op2));
15845
15846 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15847 ins_encode %{
15848 __ cmpl($op1$$Register, $op2$$constant);
15849 %}
15850 ins_pipe(ialu_reg_reg);
15851 %}
15852
15853 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15854 %{
15855 match(Set cr (OverflowSubL op1 op2));
15856
15857 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15858 ins_encode %{
15859 __ cmpq($op1$$Register, $op2$$Register);
15860 %}
15861 ins_pipe(ialu_reg_reg);
15862 %}
15863
15864 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15865 %{
15866 match(Set cr (OverflowSubL op1 op2));
15867
15868 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15869 ins_encode %{
15870 __ cmpq($op1$$Register, $op2$$constant);
15871 %}
15872 ins_pipe(ialu_reg_reg);
15873 %}
15874
15875 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15876 %{
15877 match(Set cr (OverflowSubI zero op2));
15878 effect(DEF cr, USE_KILL op2);
15879
15880 format %{ "negl $op2\t# overflow check int" %}
15881 ins_encode %{
15882 __ negl($op2$$Register);
15883 %}
15884 ins_pipe(ialu_reg_reg);
15885 %}
15886
15887 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15888 %{
15889 match(Set cr (OverflowSubL zero op2));
15890 effect(DEF cr, USE_KILL op2);
15891
15892 format %{ "negq $op2\t# overflow check long" %}
15893 ins_encode %{
15894 __ negq($op2$$Register);
15895 %}
15896 ins_pipe(ialu_reg_reg);
15897 %}
15898
15899 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15900 %{
15901 match(Set cr (OverflowMulI op1 op2));
15902 effect(DEF cr, USE_KILL op1, USE op2);
15903
15904 format %{ "imull $op1, $op2\t# overflow check int" %}
15905 ins_encode %{
15906 __ imull($op1$$Register, $op2$$Register);
15907 %}
15908 ins_pipe(ialu_reg_reg_alu0);
15909 %}
15910
15911 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15912 %{
15913 match(Set cr (OverflowMulI op1 op2));
15914 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15915
15916 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15917 ins_encode %{
15918 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15919 %}
15920 ins_pipe(ialu_reg_reg_alu0);
15921 %}
15922
15923 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15924 %{
15925 match(Set cr (OverflowMulL op1 op2));
15926 effect(DEF cr, USE_KILL op1, USE op2);
15927
15928 format %{ "imulq $op1, $op2\t# overflow check long" %}
15929 ins_encode %{
15930 __ imulq($op1$$Register, $op2$$Register);
15931 %}
15932 ins_pipe(ialu_reg_reg_alu0);
15933 %}
15934
15935 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15936 %{
15937 match(Set cr (OverflowMulL op1 op2));
15938 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15939
15940 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15941 ins_encode %{
15942 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15943 %}
15944 ins_pipe(ialu_reg_reg_alu0);
15945 %}
15946
15947
15948 //----------Control Flow Instructions------------------------------------------
15949 // Signed compare Instructions
15950
15951 // XXX more variants!!
15952 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15953 %{
15954 match(Set cr (CmpI op1 op2));
15955 effect(DEF cr, USE op1, USE op2);
15956
15957 format %{ "cmpl $op1, $op2" %}
15958 ins_encode %{
15959 __ cmpl($op1$$Register, $op2$$Register);
15960 %}
15961 ins_pipe(ialu_cr_reg_reg);
15962 %}
15963
15964 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15965 %{
15966 match(Set cr (CmpI op1 op2));
15967
15968 format %{ "cmpl $op1, $op2" %}
15969 ins_encode %{
15970 __ cmpl($op1$$Register, $op2$$constant);
15971 %}
15972 ins_pipe(ialu_cr_reg_imm);
15973 %}
15974
15975 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15976 %{
15977 match(Set cr (CmpI op1 (LoadI op2)));
15978
15979 ins_cost(500); // XXX
15980 format %{ "cmpl $op1, $op2" %}
15981 ins_encode %{
15982 __ cmpl($op1$$Register, $op2$$Address);
15983 %}
15984 ins_pipe(ialu_cr_reg_mem);
15985 %}
15986
15987 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15988 %{
15989 match(Set cr (CmpI src zero));
15990
15991 format %{ "testl $src, $src" %}
15992 ins_encode %{
15993 __ testl($src$$Register, $src$$Register);
15994 %}
15995 ins_pipe(ialu_cr_reg_imm);
15996 %}
15997
15998 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15999 %{
16000 match(Set cr (CmpI (AndI src con) zero));
16001
16002 format %{ "testl $src, $con" %}
16003 ins_encode %{
16004 __ testl($src$$Register, $con$$constant);
16005 %}
16006 ins_pipe(ialu_cr_reg_imm);
16007 %}
16008
16009 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16010 %{
16011 match(Set cr (CmpI (AndI src1 src2) zero));
16012
16013 format %{ "testl $src1, $src2" %}
16014 ins_encode %{
16015 __ testl($src1$$Register, $src2$$Register);
16016 %}
16017 ins_pipe(ialu_cr_reg_imm);
16018 %}
16019
16020 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16021 %{
16022 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16023
16024 format %{ "testl $src, $mem" %}
16025 ins_encode %{
16026 __ testl($src$$Register, $mem$$Address);
16027 %}
16028 ins_pipe(ialu_cr_reg_mem);
16029 %}
16030
16031 // Unsigned compare Instructions; really, same as signed except they
16032 // produce an rFlagsRegU instead of rFlagsReg.
16033 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16034 %{
16035 match(Set cr (CmpU op1 op2));
16036
16037 format %{ "cmpl $op1, $op2\t# unsigned" %}
16038 ins_encode %{
16039 __ cmpl($op1$$Register, $op2$$Register);
16040 %}
16041 ins_pipe(ialu_cr_reg_reg);
16042 %}
16043
16044 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16045 %{
16046 match(Set cr (CmpU op1 op2));
16047
16048 format %{ "cmpl $op1, $op2\t# unsigned" %}
16049 ins_encode %{
16050 __ cmpl($op1$$Register, $op2$$constant);
16051 %}
16052 ins_pipe(ialu_cr_reg_imm);
16053 %}
16054
16055 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16056 %{
16057 match(Set cr (CmpU op1 (LoadI op2)));
16058
16059 ins_cost(500); // XXX
16060 format %{ "cmpl $op1, $op2\t# unsigned" %}
16061 ins_encode %{
16062 __ cmpl($op1$$Register, $op2$$Address);
16063 %}
16064 ins_pipe(ialu_cr_reg_mem);
16065 %}
16066
16067 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16068 %{
16069 match(Set cr (CmpU src zero));
16070
16071 format %{ "testl $src, $src\t# unsigned" %}
16072 ins_encode %{
16073 __ testl($src$$Register, $src$$Register);
16074 %}
16075 ins_pipe(ialu_cr_reg_imm);
16076 %}
16077
16078 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16079 %{
16080 match(Set cr (CmpP op1 op2));
16081
16082 format %{ "cmpq $op1, $op2\t# ptr" %}
16083 ins_encode %{
16084 __ cmpq($op1$$Register, $op2$$Register);
16085 %}
16086 ins_pipe(ialu_cr_reg_reg);
16087 %}
16088
16089 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16090 %{
16091 match(Set cr (CmpP op1 (LoadP op2)));
16092 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16093
16094 ins_cost(500); // XXX
16095 format %{ "cmpq $op1, $op2\t# ptr" %}
16096 ins_encode %{
16097 __ cmpq($op1$$Register, $op2$$Address);
16098 %}
16099 ins_pipe(ialu_cr_reg_mem);
16100 %}
16101
16102 // XXX this is generalized by compP_rReg_mem???
16103 // Compare raw pointer (used in out-of-heap check).
16104 // Only works because non-oop pointers must be raw pointers
16105 // and raw pointers have no anti-dependencies.
16106 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16107 %{
16108 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16109 n->in(2)->as_Load()->barrier_data() == 0);
16110 match(Set cr (CmpP op1 (LoadP op2)));
16111
16112 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16113 ins_encode %{
16114 __ cmpq($op1$$Register, $op2$$Address);
16115 %}
16116 ins_pipe(ialu_cr_reg_mem);
16117 %}
16118
16119 // This will generate a signed flags result. This should be OK since
16120 // any compare to a zero should be eq/neq.
16121 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16122 %{
16123 match(Set cr (CmpP src zero));
16124
16125 format %{ "testq $src, $src\t# ptr" %}
16126 ins_encode %{
16127 __ testq($src$$Register, $src$$Register);
16128 %}
16129 ins_pipe(ialu_cr_reg_imm);
16130 %}
16131
16132 // This will generate a signed flags result. This should be OK since
16133 // any compare to a zero should be eq/neq.
16134 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16135 %{
16136 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16137 n->in(1)->as_Load()->barrier_data() == 0);
16138 match(Set cr (CmpP (LoadP op) zero));
16139
16140 ins_cost(500); // XXX
16141 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16142 ins_encode %{
16143 __ testq($op$$Address, 0xFFFFFFFF);
16144 %}
16145 ins_pipe(ialu_cr_reg_imm);
16146 %}
16147
16148 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16149 %{
16150 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16151 n->in(1)->as_Load()->barrier_data() == 0);
16152 match(Set cr (CmpP (LoadP mem) zero));
16153
16154 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16155 ins_encode %{
16156 __ cmpq(r12, $mem$$Address);
16157 %}
16158 ins_pipe(ialu_cr_reg_mem);
16159 %}
16160
16161 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16162 %{
16163 match(Set cr (CmpN op1 op2));
16164
16165 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16166 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16167 ins_pipe(ialu_cr_reg_reg);
16168 %}
16169
16170 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16171 %{
16172 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16173 match(Set cr (CmpN src (LoadN mem)));
16174
16175 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16176 ins_encode %{
16177 __ cmpl($src$$Register, $mem$$Address);
16178 %}
16179 ins_pipe(ialu_cr_reg_mem);
16180 %}
16181
16182 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16183 match(Set cr (CmpN op1 op2));
16184
16185 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16186 ins_encode %{
16187 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16188 %}
16189 ins_pipe(ialu_cr_reg_imm);
16190 %}
16191
16192 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16193 %{
16194 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16195 match(Set cr (CmpN src (LoadN mem)));
16196
16197 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16198 ins_encode %{
16199 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16200 %}
16201 ins_pipe(ialu_cr_reg_mem);
16202 %}
16203
16204 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16205 match(Set cr (CmpN op1 op2));
16206
16207 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16208 ins_encode %{
16209 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16210 %}
16211 ins_pipe(ialu_cr_reg_imm);
16212 %}
16213
16214 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16215 %{
16216 predicate(!UseCompactObjectHeaders);
16217 match(Set cr (CmpN src (LoadNKlass mem)));
16218
16219 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16220 ins_encode %{
16221 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16222 %}
16223 ins_pipe(ialu_cr_reg_mem);
16224 %}
16225
16226 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16227 match(Set cr (CmpN src zero));
16228
16229 format %{ "testl $src, $src\t# compressed ptr" %}
16230 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16231 ins_pipe(ialu_cr_reg_imm);
16232 %}
16233
16234 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16235 %{
16236 predicate(CompressedOops::base() != nullptr &&
16237 n->in(1)->as_Load()->barrier_data() == 0);
16238 match(Set cr (CmpN (LoadN mem) zero));
16239
16240 ins_cost(500); // XXX
16241 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16242 ins_encode %{
16243 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16244 %}
16245 ins_pipe(ialu_cr_reg_mem);
16246 %}
16247
16248 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16249 %{
16250 predicate(CompressedOops::base() == nullptr &&
16251 n->in(1)->as_Load()->barrier_data() == 0);
16252 match(Set cr (CmpN (LoadN mem) zero));
16253
16254 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16255 ins_encode %{
16256 __ cmpl(r12, $mem$$Address);
16257 %}
16258 ins_pipe(ialu_cr_reg_mem);
16259 %}
16260
16261 // Yanked all unsigned pointer compare operations.
16262 // Pointer compares are done with CmpP which is already unsigned.
16263
16264 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16265 %{
16266 match(Set cr (CmpL op1 op2));
16267
16268 format %{ "cmpq $op1, $op2" %}
16269 ins_encode %{
16270 __ cmpq($op1$$Register, $op2$$Register);
16271 %}
16272 ins_pipe(ialu_cr_reg_reg);
16273 %}
16274
16275 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16276 %{
16277 match(Set cr (CmpL op1 op2));
16278
16279 format %{ "cmpq $op1, $op2" %}
16280 ins_encode %{
16281 __ cmpq($op1$$Register, $op2$$constant);
16282 %}
16283 ins_pipe(ialu_cr_reg_imm);
16284 %}
16285
16286 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16287 %{
16288 match(Set cr (CmpL op1 (LoadL op2)));
16289
16290 format %{ "cmpq $op1, $op2" %}
16291 ins_encode %{
16292 __ cmpq($op1$$Register, $op2$$Address);
16293 %}
16294 ins_pipe(ialu_cr_reg_mem);
16295 %}
16296
16297 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16298 %{
16299 match(Set cr (CmpL src zero));
16300
16301 format %{ "testq $src, $src" %}
16302 ins_encode %{
16303 __ testq($src$$Register, $src$$Register);
16304 %}
16305 ins_pipe(ialu_cr_reg_imm);
16306 %}
16307
16308 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16309 %{
16310 match(Set cr (CmpL (AndL src con) zero));
16311
16312 format %{ "testq $src, $con\t# long" %}
16313 ins_encode %{
16314 __ testq($src$$Register, $con$$constant);
16315 %}
16316 ins_pipe(ialu_cr_reg_imm);
16317 %}
16318
16319 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16320 %{
16321 match(Set cr (CmpL (AndL src1 src2) zero));
16322
16323 format %{ "testq $src1, $src2\t# long" %}
16324 ins_encode %{
16325 __ testq($src1$$Register, $src2$$Register);
16326 %}
16327 ins_pipe(ialu_cr_reg_imm);
16328 %}
16329
16330 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16331 %{
16332 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16333
16334 format %{ "testq $src, $mem" %}
16335 ins_encode %{
16336 __ testq($src$$Register, $mem$$Address);
16337 %}
16338 ins_pipe(ialu_cr_reg_mem);
16339 %}
16340
16341 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16342 %{
16343 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16344
16345 format %{ "testq $src, $mem" %}
16346 ins_encode %{
16347 __ testq($src$$Register, $mem$$Address);
16348 %}
16349 ins_pipe(ialu_cr_reg_mem);
16350 %}
16351
16352 // Manifest a CmpU result in an integer register. Very painful.
16353 // This is the test to avoid.
16354 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16355 %{
16356 match(Set dst (CmpU3 src1 src2));
16357 effect(KILL flags);
16358
16359 ins_cost(275); // XXX
16360 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16361 "movl $dst, -1\n\t"
16362 "jb,u done\n\t"
16363 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16364 "done:" %}
16365 ins_encode %{
16366 Label done;
16367 __ cmpl($src1$$Register, $src2$$Register);
16368 __ movl($dst$$Register, -1);
16369 __ jccb(Assembler::below, done);
16370 __ setcc(Assembler::notZero, $dst$$Register);
16371 __ bind(done);
16372 %}
16373 ins_pipe(pipe_slow);
16374 %}
16375
16376 // Manifest a CmpL result in an integer register. Very painful.
16377 // This is the test to avoid.
16378 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16379 %{
16380 match(Set dst (CmpL3 src1 src2));
16381 effect(KILL flags);
16382
16383 ins_cost(275); // XXX
16384 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16385 "movl $dst, -1\n\t"
16386 "jl,s done\n\t"
16387 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16388 "done:" %}
16389 ins_encode %{
16390 Label done;
16391 __ cmpq($src1$$Register, $src2$$Register);
16392 __ movl($dst$$Register, -1);
16393 __ jccb(Assembler::less, done);
16394 __ setcc(Assembler::notZero, $dst$$Register);
16395 __ bind(done);
16396 %}
16397 ins_pipe(pipe_slow);
16398 %}
16399
16400 // Manifest a CmpUL result in an integer register. Very painful.
16401 // This is the test to avoid.
16402 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16403 %{
16404 match(Set dst (CmpUL3 src1 src2));
16405 effect(KILL flags);
16406
16407 ins_cost(275); // XXX
16408 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16409 "movl $dst, -1\n\t"
16410 "jb,u done\n\t"
16411 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16412 "done:" %}
16413 ins_encode %{
16414 Label done;
16415 __ cmpq($src1$$Register, $src2$$Register);
16416 __ movl($dst$$Register, -1);
16417 __ jccb(Assembler::below, done);
16418 __ setcc(Assembler::notZero, $dst$$Register);
16419 __ bind(done);
16420 %}
16421 ins_pipe(pipe_slow);
16422 %}
16423
16424 // Unsigned long compare Instructions; really, same as signed long except they
16425 // produce an rFlagsRegU instead of rFlagsReg.
16426 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16427 %{
16428 match(Set cr (CmpUL op1 op2));
16429
16430 format %{ "cmpq $op1, $op2\t# unsigned" %}
16431 ins_encode %{
16432 __ cmpq($op1$$Register, $op2$$Register);
16433 %}
16434 ins_pipe(ialu_cr_reg_reg);
16435 %}
16436
16437 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16438 %{
16439 match(Set cr (CmpUL op1 op2));
16440
16441 format %{ "cmpq $op1, $op2\t# unsigned" %}
16442 ins_encode %{
16443 __ cmpq($op1$$Register, $op2$$constant);
16444 %}
16445 ins_pipe(ialu_cr_reg_imm);
16446 %}
16447
16448 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16449 %{
16450 match(Set cr (CmpUL op1 (LoadL op2)));
16451
16452 format %{ "cmpq $op1, $op2\t# unsigned" %}
16453 ins_encode %{
16454 __ cmpq($op1$$Register, $op2$$Address);
16455 %}
16456 ins_pipe(ialu_cr_reg_mem);
16457 %}
16458
16459 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16460 %{
16461 match(Set cr (CmpUL src zero));
16462
16463 format %{ "testq $src, $src\t# unsigned" %}
16464 ins_encode %{
16465 __ testq($src$$Register, $src$$Register);
16466 %}
16467 ins_pipe(ialu_cr_reg_imm);
16468 %}
16469
16470 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16471 %{
16472 match(Set cr (CmpI (LoadB mem) imm));
16473
16474 ins_cost(125);
16475 format %{ "cmpb $mem, $imm" %}
16476 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16477 ins_pipe(ialu_cr_reg_mem);
16478 %}
16479
16480 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16481 %{
16482 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16483
16484 ins_cost(125);
16485 format %{ "testb $mem, $imm\t# ubyte" %}
16486 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16487 ins_pipe(ialu_cr_reg_mem);
16488 %}
16489
16490 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16491 %{
16492 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16493
16494 ins_cost(125);
16495 format %{ "testb $mem, $imm\t# byte" %}
16496 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16497 ins_pipe(ialu_cr_reg_mem);
16498 %}
16499
16500 //----------Max and Min--------------------------------------------------------
16501 // Min Instructions
16502
16503 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16504 %{
16505 predicate(!UseAPX);
16506 effect(USE_DEF dst, USE src, USE cr);
16507
16508 format %{ "cmovlgt $dst, $src\t# min" %}
16509 ins_encode %{
16510 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16511 %}
16512 ins_pipe(pipe_cmov_reg);
16513 %}
16514
16515 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16516 %{
16517 predicate(UseAPX);
16518 effect(DEF dst, USE src1, USE src2, USE cr);
16519
16520 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16521 ins_encode %{
16522 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16523 %}
16524 ins_pipe(pipe_cmov_reg);
16525 %}
16526
16527 instruct minI_rReg(rRegI dst, rRegI src)
16528 %{
16529 predicate(!UseAPX);
16530 match(Set dst (MinI dst src));
16531
16532 ins_cost(200);
16533 expand %{
16534 rFlagsReg cr;
16535 compI_rReg(cr, dst, src);
16536 cmovI_reg_g(dst, src, cr);
16537 %}
16538 %}
16539
16540 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16541 %{
16542 predicate(UseAPX);
16543 match(Set dst (MinI src1 src2));
16544 effect(DEF dst, USE src1, USE src2);
16545
16546 ins_cost(200);
16547 expand %{
16548 rFlagsReg cr;
16549 compI_rReg(cr, src1, src2);
16550 cmovI_reg_g_ndd(dst, src1, src2, cr);
16551 %}
16552 %}
16553
16554 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16555 %{
16556 predicate(!UseAPX);
16557 effect(USE_DEF dst, USE src, USE cr);
16558
16559 format %{ "cmovllt $dst, $src\t# max" %}
16560 ins_encode %{
16561 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16562 %}
16563 ins_pipe(pipe_cmov_reg);
16564 %}
16565
16566 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16567 %{
16568 predicate(UseAPX);
16569 effect(DEF dst, USE src1, USE src2, USE cr);
16570
16571 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16572 ins_encode %{
16573 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16574 %}
16575 ins_pipe(pipe_cmov_reg);
16576 %}
16577
16578 instruct maxI_rReg(rRegI dst, rRegI src)
16579 %{
16580 predicate(!UseAPX);
16581 match(Set dst (MaxI dst src));
16582
16583 ins_cost(200);
16584 expand %{
16585 rFlagsReg cr;
16586 compI_rReg(cr, dst, src);
16587 cmovI_reg_l(dst, src, cr);
16588 %}
16589 %}
16590
16591 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16592 %{
16593 predicate(UseAPX);
16594 match(Set dst (MaxI src1 src2));
16595 effect(DEF dst, USE src1, USE src2);
16596
16597 ins_cost(200);
16598 expand %{
16599 rFlagsReg cr;
16600 compI_rReg(cr, src1, src2);
16601 cmovI_reg_l_ndd(dst, src1, src2, cr);
16602 %}
16603 %}
16604
16605 // ============================================================================
16606 // Branch Instructions
16607
16608 // Jump Direct - Label defines a relative address from JMP+1
16609 instruct jmpDir(label labl)
16610 %{
16611 match(Goto);
16612 effect(USE labl);
16613
16614 ins_cost(300);
16615 format %{ "jmp $labl" %}
16616 size(5);
16617 ins_encode %{
16618 Label* L = $labl$$label;
16619 __ jmp(*L, false); // Always long jump
16620 %}
16621 ins_pipe(pipe_jmp);
16622 %}
16623
16624 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16625 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16626 %{
16627 match(If cop cr);
16628 effect(USE labl);
16629
16630 ins_cost(300);
16631 format %{ "j$cop $labl" %}
16632 size(6);
16633 ins_encode %{
16634 Label* L = $labl$$label;
16635 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16636 %}
16637 ins_pipe(pipe_jcc);
16638 %}
16639
16640 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16641 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16642 %{
16643 match(CountedLoopEnd cop cr);
16644 effect(USE labl);
16645
16646 ins_cost(300);
16647 format %{ "j$cop $labl\t# loop end" %}
16648 size(6);
16649 ins_encode %{
16650 Label* L = $labl$$label;
16651 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16652 %}
16653 ins_pipe(pipe_jcc);
16654 %}
16655
16656 // Jump Direct Conditional - using unsigned comparison
16657 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16658 match(If cop cmp);
16659 effect(USE labl);
16660
16661 ins_cost(300);
16662 format %{ "j$cop,u $labl" %}
16663 size(6);
16664 ins_encode %{
16665 Label* L = $labl$$label;
16666 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16667 %}
16668 ins_pipe(pipe_jcc);
16669 %}
16670
16671 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16672 match(If cop cmp);
16673 effect(USE labl);
16674
16675 ins_cost(200);
16676 format %{ "j$cop,u $labl" %}
16677 size(6);
16678 ins_encode %{
16679 Label* L = $labl$$label;
16680 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16681 %}
16682 ins_pipe(pipe_jcc);
16683 %}
16684
16685 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16686 match(If cop cmp);
16687 effect(USE labl);
16688
16689 ins_cost(200);
16690 format %{ $$template
16691 if ($cop$$cmpcode == Assembler::notEqual) {
16692 $$emit$$"jp,u $labl\n\t"
16693 $$emit$$"j$cop,u $labl"
16694 } else {
16695 $$emit$$"jp,u done\n\t"
16696 $$emit$$"j$cop,u $labl\n\t"
16697 $$emit$$"done:"
16698 }
16699 %}
16700 ins_encode %{
16701 Label* l = $labl$$label;
16702 if ($cop$$cmpcode == Assembler::notEqual) {
16703 __ jcc(Assembler::parity, *l, false);
16704 __ jcc(Assembler::notEqual, *l, false);
16705 } else if ($cop$$cmpcode == Assembler::equal) {
16706 Label done;
16707 __ jccb(Assembler::parity, done);
16708 __ jcc(Assembler::equal, *l, false);
16709 __ bind(done);
16710 } else {
16711 ShouldNotReachHere();
16712 }
16713 %}
16714 ins_pipe(pipe_jcc);
16715 %}
16716
16717 // ============================================================================
16718 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16719 // superklass array for an instance of the superklass. Set a hidden
16720 // internal cache on a hit (cache is checked with exposed code in
16721 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16722 // encoding ALSO sets flags.
16723
16724 instruct partialSubtypeCheck(rdi_RegP result,
16725 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16726 rFlagsReg cr)
16727 %{
16728 match(Set result (PartialSubtypeCheck sub super));
16729 predicate(!UseSecondarySupersTable);
16730 effect(KILL rcx, KILL cr);
16731
16732 ins_cost(1100); // slightly larger than the next version
16733 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16734 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16735 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16736 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16737 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16738 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16739 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16740 "miss:\t" %}
16741
16742 ins_encode %{
16743 Label miss;
16744 // NB: Callers may assume that, when $result is a valid register,
16745 // check_klass_subtype_slow_path_linear sets it to a nonzero
16746 // value.
16747 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16748 $rcx$$Register, $result$$Register,
16749 nullptr, &miss,
16750 /*set_cond_codes:*/ true);
16751 __ xorptr($result$$Register, $result$$Register);
16752 __ bind(miss);
16753 %}
16754
16755 ins_pipe(pipe_slow);
16756 %}
16757
16758 // ============================================================================
16759 // Two versions of hashtable-based partialSubtypeCheck, both used when
16760 // we need to search for a super class in the secondary supers array.
16761 // The first is used when we don't know _a priori_ the class being
16762 // searched for. The second, far more common, is used when we do know:
16763 // this is used for instanceof, checkcast, and any case where C2 can
16764 // determine it by constant propagation.
16765
16766 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16767 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16768 rFlagsReg cr)
16769 %{
16770 match(Set result (PartialSubtypeCheck sub super));
16771 predicate(UseSecondarySupersTable);
16772 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16773
16774 ins_cost(1000);
16775 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16776
16777 ins_encode %{
16778 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16779 $temp3$$Register, $temp4$$Register, $result$$Register);
16780 %}
16781
16782 ins_pipe(pipe_slow);
16783 %}
16784
16785 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16786 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16787 rFlagsReg cr)
16788 %{
16789 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16790 predicate(UseSecondarySupersTable);
16791 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16792
16793 ins_cost(700); // smaller than the next version
16794 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16795
16796 ins_encode %{
16797 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16798 if (InlineSecondarySupersTest) {
16799 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16800 $temp3$$Register, $temp4$$Register, $result$$Register,
16801 super_klass_slot);
16802 } else {
16803 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16804 }
16805 %}
16806
16807 ins_pipe(pipe_slow);
16808 %}
16809
16810 // ============================================================================
16811 // Branch Instructions -- short offset versions
16812 //
16813 // These instructions are used to replace jumps of a long offset (the default
16814 // match) with jumps of a shorter offset. These instructions are all tagged
16815 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16816 // match rules in general matching. Instead, the ADLC generates a conversion
16817 // method in the MachNode which can be used to do in-place replacement of the
16818 // long variant with the shorter variant. The compiler will determine if a
16819 // branch can be taken by the is_short_branch_offset() predicate in the machine
16820 // specific code section of the file.
16821
16822 // Jump Direct - Label defines a relative address from JMP+1
16823 instruct jmpDir_short(label labl) %{
16824 match(Goto);
16825 effect(USE labl);
16826
16827 ins_cost(300);
16828 format %{ "jmp,s $labl" %}
16829 size(2);
16830 ins_encode %{
16831 Label* L = $labl$$label;
16832 __ jmpb(*L);
16833 %}
16834 ins_pipe(pipe_jmp);
16835 ins_short_branch(1);
16836 %}
16837
16838 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16839 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16840 match(If cop cr);
16841 effect(USE labl);
16842
16843 ins_cost(300);
16844 format %{ "j$cop,s $labl" %}
16845 size(2);
16846 ins_encode %{
16847 Label* L = $labl$$label;
16848 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16849 %}
16850 ins_pipe(pipe_jcc);
16851 ins_short_branch(1);
16852 %}
16853
16854 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16855 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16856 match(CountedLoopEnd cop cr);
16857 effect(USE labl);
16858
16859 ins_cost(300);
16860 format %{ "j$cop,s $labl\t# loop end" %}
16861 size(2);
16862 ins_encode %{
16863 Label* L = $labl$$label;
16864 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16865 %}
16866 ins_pipe(pipe_jcc);
16867 ins_short_branch(1);
16868 %}
16869
16870 // Jump Direct Conditional - using unsigned comparison
16871 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16872 match(If cop cmp);
16873 effect(USE labl);
16874
16875 ins_cost(300);
16876 format %{ "j$cop,us $labl" %}
16877 size(2);
16878 ins_encode %{
16879 Label* L = $labl$$label;
16880 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16881 %}
16882 ins_pipe(pipe_jcc);
16883 ins_short_branch(1);
16884 %}
16885
16886 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16887 match(If cop cmp);
16888 effect(USE labl);
16889
16890 ins_cost(300);
16891 format %{ "j$cop,us $labl" %}
16892 size(2);
16893 ins_encode %{
16894 Label* L = $labl$$label;
16895 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16896 %}
16897 ins_pipe(pipe_jcc);
16898 ins_short_branch(1);
16899 %}
16900
16901 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16902 match(If cop cmp);
16903 effect(USE labl);
16904
16905 ins_cost(300);
16906 format %{ $$template
16907 if ($cop$$cmpcode == Assembler::notEqual) {
16908 $$emit$$"jp,u,s $labl\n\t"
16909 $$emit$$"j$cop,u,s $labl"
16910 } else {
16911 $$emit$$"jp,u,s done\n\t"
16912 $$emit$$"j$cop,u,s $labl\n\t"
16913 $$emit$$"done:"
16914 }
16915 %}
16916 size(4);
16917 ins_encode %{
16918 Label* l = $labl$$label;
16919 if ($cop$$cmpcode == Assembler::notEqual) {
16920 __ jccb(Assembler::parity, *l);
16921 __ jccb(Assembler::notEqual, *l);
16922 } else if ($cop$$cmpcode == Assembler::equal) {
16923 Label done;
16924 __ jccb(Assembler::parity, done);
16925 __ jccb(Assembler::equal, *l);
16926 __ bind(done);
16927 } else {
16928 ShouldNotReachHere();
16929 }
16930 %}
16931 ins_pipe(pipe_jcc);
16932 ins_short_branch(1);
16933 %}
16934
16935 // ============================================================================
16936 // inlined locking and unlocking
16937
16938 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16939 match(Set cr (FastLock object box));
16940 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16941 ins_cost(300);
16942 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16943 ins_encode %{
16944 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16945 %}
16946 ins_pipe(pipe_slow);
16947 %}
16948
16949 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16950 match(Set cr (FastUnlock object rax_reg));
16951 effect(TEMP tmp, USE_KILL rax_reg);
16952 ins_cost(300);
16953 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16954 ins_encode %{
16955 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16956 %}
16957 ins_pipe(pipe_slow);
16958 %}
16959
16960
16961 // ============================================================================
16962 // Safepoint Instructions
16963 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16964 %{
16965 match(SafePoint poll);
16966 effect(KILL cr, USE poll);
16967
16968 format %{ "testl rax, [$poll]\t"
16969 "# Safepoint: poll for GC" %}
16970 ins_cost(125);
16971 ins_encode %{
16972 __ relocate(relocInfo::poll_type);
16973 address pre_pc = __ pc();
16974 __ testl(rax, Address($poll$$Register, 0));
16975 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16976 %}
16977 ins_pipe(ialu_reg_mem);
16978 %}
16979
16980 instruct mask_all_evexL(kReg dst, rRegL src) %{
16981 match(Set dst (MaskAll src));
16982 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16983 ins_encode %{
16984 int mask_len = Matcher::vector_length(this);
16985 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16986 %}
16987 ins_pipe( pipe_slow );
16988 %}
16989
16990 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16991 predicate(Matcher::vector_length(n) > 32);
16992 match(Set dst (MaskAll src));
16993 effect(TEMP tmp);
16994 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16995 ins_encode %{
16996 int mask_len = Matcher::vector_length(this);
16997 __ movslq($tmp$$Register, $src$$Register);
16998 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16999 %}
17000 ins_pipe( pipe_slow );
17001 %}
17002
17003 // ============================================================================
17004 // Procedure Call/Return Instructions
17005 // Call Java Static Instruction
17006 // Note: If this code changes, the corresponding ret_addr_offset() and
17007 // compute_padding() functions will have to be adjusted.
17008 instruct CallStaticJavaDirect(method meth) %{
17009 match(CallStaticJava);
17010 effect(USE meth);
17011
17012 ins_cost(300);
17013 format %{ "call,static " %}
17014 opcode(0xE8); /* E8 cd */
17015 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17016 ins_pipe(pipe_slow);
17017 ins_alignment(4);
17018 %}
17019
17020 // Call Java Dynamic Instruction
17021 // Note: If this code changes, the corresponding ret_addr_offset() and
17022 // compute_padding() functions will have to be adjusted.
17023 instruct CallDynamicJavaDirect(method meth)
17024 %{
17025 match(CallDynamicJava);
17026 effect(USE meth);
17027
17028 ins_cost(300);
17029 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17030 "call,dynamic " %}
17031 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17032 ins_pipe(pipe_slow);
17033 ins_alignment(4);
17034 %}
17035
17036 // Call Runtime Instruction
17037 instruct CallRuntimeDirect(method meth)
17038 %{
17039 match(CallRuntime);
17040 effect(USE meth);
17041
17042 ins_cost(300);
17043 format %{ "call,runtime " %}
17044 ins_encode(clear_avx, Java_To_Runtime(meth));
17045 ins_pipe(pipe_slow);
17046 %}
17047
17048 // Call runtime without safepoint
17049 instruct CallLeafDirect(method meth)
17050 %{
17051 match(CallLeaf);
17052 effect(USE meth);
17053
17054 ins_cost(300);
17055 format %{ "call_leaf,runtime " %}
17056 ins_encode(clear_avx, Java_To_Runtime(meth));
17057 ins_pipe(pipe_slow);
17058 %}
17059
17060 // Call runtime without safepoint and with vector arguments
17061 instruct CallLeafDirectVector(method meth)
17062 %{
17063 match(CallLeafVector);
17064 effect(USE meth);
17065
17066 ins_cost(300);
17067 format %{ "call_leaf,vector " %}
17068 ins_encode(Java_To_Runtime(meth));
17069 ins_pipe(pipe_slow);
17070 %}
17071
17072 // Call runtime without safepoint
17073 instruct CallLeafNoFPDirect(method meth)
17074 %{
17075 match(CallLeafNoFP);
17076 effect(USE meth);
17077
17078 ins_cost(300);
17079 format %{ "call_leaf_nofp,runtime " %}
17080 ins_encode(clear_avx, Java_To_Runtime(meth));
17081 ins_pipe(pipe_slow);
17082 %}
17083
17084 // Return Instruction
17085 // Remove the return address & jump to it.
17086 // Notice: We always emit a nop after a ret to make sure there is room
17087 // for safepoint patching
17088 instruct Ret()
17089 %{
17090 match(Return);
17091
17092 format %{ "ret" %}
17093 ins_encode %{
17094 __ ret(0);
17095 %}
17096 ins_pipe(pipe_jmp);
17097 %}
17098
17099 // Tail Call; Jump from runtime stub to Java code.
17100 // Also known as an 'interprocedural jump'.
17101 // Target of jump will eventually return to caller.
17102 // TailJump below removes the return address.
17103 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17104 // emitted just above the TailCall which has reset rbp to the caller state.
17105 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17106 %{
17107 match(TailCall jump_target method_ptr);
17108
17109 ins_cost(300);
17110 format %{ "jmp $jump_target\t# rbx holds method" %}
17111 ins_encode %{
17112 __ jmp($jump_target$$Register);
17113 %}
17114 ins_pipe(pipe_jmp);
17115 %}
17116
17117 // Tail Jump; remove the return address; jump to target.
17118 // TailCall above leaves the return address around.
17119 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17120 %{
17121 match(TailJump jump_target ex_oop);
17122
17123 ins_cost(300);
17124 format %{ "popq rdx\t# pop return address\n\t"
17125 "jmp $jump_target" %}
17126 ins_encode %{
17127 __ popq(as_Register(RDX_enc));
17128 __ jmp($jump_target$$Register);
17129 %}
17130 ins_pipe(pipe_jmp);
17131 %}
17132
17133 // Forward exception.
17134 instruct ForwardExceptionjmp()
17135 %{
17136 match(ForwardException);
17137
17138 format %{ "jmp forward_exception_stub" %}
17139 ins_encode %{
17140 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17141 %}
17142 ins_pipe(pipe_jmp);
17143 %}
17144
17145 // Create exception oop: created by stack-crawling runtime code.
17146 // Created exception is now available to this handler, and is setup
17147 // just prior to jumping to this handler. No code emitted.
17148 instruct CreateException(rax_RegP ex_oop)
17149 %{
17150 match(Set ex_oop (CreateEx));
17151
17152 size(0);
17153 // use the following format syntax
17154 format %{ "# exception oop is in rax; no code emitted" %}
17155 ins_encode();
17156 ins_pipe(empty);
17157 %}
17158
17159 // Rethrow exception:
17160 // The exception oop will come in the first argument position.
17161 // Then JUMP (not call) to the rethrow stub code.
17162 instruct RethrowException()
17163 %{
17164 match(Rethrow);
17165
17166 // use the following format syntax
17167 format %{ "jmp rethrow_stub" %}
17168 ins_encode %{
17169 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17170 %}
17171 ins_pipe(pipe_jmp);
17172 %}
17173
17174 // ============================================================================
17175 // This name is KNOWN by the ADLC and cannot be changed.
17176 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17177 // for this guy.
17178 instruct tlsLoadP(r15_RegP dst) %{
17179 match(Set dst (ThreadLocal));
17180 effect(DEF dst);
17181
17182 size(0);
17183 format %{ "# TLS is in R15" %}
17184 ins_encode( /*empty encoding*/ );
17185 ins_pipe(ialu_reg_reg);
17186 %}
17187
17188 instruct addF_reg(regF dst, regF src) %{
17189 predicate(UseAVX == 0);
17190 match(Set dst (AddF dst src));
17191
17192 format %{ "addss $dst, $src" %}
17193 ins_cost(150);
17194 ins_encode %{
17195 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17196 %}
17197 ins_pipe(pipe_slow);
17198 %}
17199
17200 instruct addF_mem(regF dst, memory src) %{
17201 predicate(UseAVX == 0);
17202 match(Set dst (AddF dst (LoadF src)));
17203
17204 format %{ "addss $dst, $src" %}
17205 ins_cost(150);
17206 ins_encode %{
17207 __ addss($dst$$XMMRegister, $src$$Address);
17208 %}
17209 ins_pipe(pipe_slow);
17210 %}
17211
17212 instruct addF_imm(regF dst, immF con) %{
17213 predicate(UseAVX == 0);
17214 match(Set dst (AddF dst con));
17215 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17216 ins_cost(150);
17217 ins_encode %{
17218 __ addss($dst$$XMMRegister, $constantaddress($con));
17219 %}
17220 ins_pipe(pipe_slow);
17221 %}
17222
17223 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17224 predicate(UseAVX > 0);
17225 match(Set dst (AddF src1 src2));
17226
17227 format %{ "vaddss $dst, $src1, $src2" %}
17228 ins_cost(150);
17229 ins_encode %{
17230 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17231 %}
17232 ins_pipe(pipe_slow);
17233 %}
17234
17235 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17236 predicate(UseAVX > 0);
17237 match(Set dst (AddF src1 (LoadF src2)));
17238
17239 format %{ "vaddss $dst, $src1, $src2" %}
17240 ins_cost(150);
17241 ins_encode %{
17242 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17243 %}
17244 ins_pipe(pipe_slow);
17245 %}
17246
17247 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17248 predicate(UseAVX > 0);
17249 match(Set dst (AddF src con));
17250
17251 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17252 ins_cost(150);
17253 ins_encode %{
17254 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17255 %}
17256 ins_pipe(pipe_slow);
17257 %}
17258
17259 instruct addD_reg(regD dst, regD src) %{
17260 predicate(UseAVX == 0);
17261 match(Set dst (AddD dst src));
17262
17263 format %{ "addsd $dst, $src" %}
17264 ins_cost(150);
17265 ins_encode %{
17266 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17267 %}
17268 ins_pipe(pipe_slow);
17269 %}
17270
17271 instruct addD_mem(regD dst, memory src) %{
17272 predicate(UseAVX == 0);
17273 match(Set dst (AddD dst (LoadD src)));
17274
17275 format %{ "addsd $dst, $src" %}
17276 ins_cost(150);
17277 ins_encode %{
17278 __ addsd($dst$$XMMRegister, $src$$Address);
17279 %}
17280 ins_pipe(pipe_slow);
17281 %}
17282
17283 instruct addD_imm(regD dst, immD con) %{
17284 predicate(UseAVX == 0);
17285 match(Set dst (AddD dst con));
17286 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17287 ins_cost(150);
17288 ins_encode %{
17289 __ addsd($dst$$XMMRegister, $constantaddress($con));
17290 %}
17291 ins_pipe(pipe_slow);
17292 %}
17293
17294 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17295 predicate(UseAVX > 0);
17296 match(Set dst (AddD src1 src2));
17297
17298 format %{ "vaddsd $dst, $src1, $src2" %}
17299 ins_cost(150);
17300 ins_encode %{
17301 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17302 %}
17303 ins_pipe(pipe_slow);
17304 %}
17305
17306 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17307 predicate(UseAVX > 0);
17308 match(Set dst (AddD src1 (LoadD src2)));
17309
17310 format %{ "vaddsd $dst, $src1, $src2" %}
17311 ins_cost(150);
17312 ins_encode %{
17313 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17314 %}
17315 ins_pipe(pipe_slow);
17316 %}
17317
17318 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17319 predicate(UseAVX > 0);
17320 match(Set dst (AddD src con));
17321
17322 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17323 ins_cost(150);
17324 ins_encode %{
17325 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17326 %}
17327 ins_pipe(pipe_slow);
17328 %}
17329
17330 instruct subF_reg(regF dst, regF src) %{
17331 predicate(UseAVX == 0);
17332 match(Set dst (SubF dst src));
17333
17334 format %{ "subss $dst, $src" %}
17335 ins_cost(150);
17336 ins_encode %{
17337 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17338 %}
17339 ins_pipe(pipe_slow);
17340 %}
17341
17342 instruct subF_mem(regF dst, memory src) %{
17343 predicate(UseAVX == 0);
17344 match(Set dst (SubF dst (LoadF src)));
17345
17346 format %{ "subss $dst, $src" %}
17347 ins_cost(150);
17348 ins_encode %{
17349 __ subss($dst$$XMMRegister, $src$$Address);
17350 %}
17351 ins_pipe(pipe_slow);
17352 %}
17353
17354 instruct subF_imm(regF dst, immF con) %{
17355 predicate(UseAVX == 0);
17356 match(Set dst (SubF dst con));
17357 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17358 ins_cost(150);
17359 ins_encode %{
17360 __ subss($dst$$XMMRegister, $constantaddress($con));
17361 %}
17362 ins_pipe(pipe_slow);
17363 %}
17364
17365 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17366 predicate(UseAVX > 0);
17367 match(Set dst (SubF src1 src2));
17368
17369 format %{ "vsubss $dst, $src1, $src2" %}
17370 ins_cost(150);
17371 ins_encode %{
17372 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17373 %}
17374 ins_pipe(pipe_slow);
17375 %}
17376
17377 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17378 predicate(UseAVX > 0);
17379 match(Set dst (SubF src1 (LoadF src2)));
17380
17381 format %{ "vsubss $dst, $src1, $src2" %}
17382 ins_cost(150);
17383 ins_encode %{
17384 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17385 %}
17386 ins_pipe(pipe_slow);
17387 %}
17388
17389 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17390 predicate(UseAVX > 0);
17391 match(Set dst (SubF src con));
17392
17393 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17394 ins_cost(150);
17395 ins_encode %{
17396 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17397 %}
17398 ins_pipe(pipe_slow);
17399 %}
17400
17401 instruct subD_reg(regD dst, regD src) %{
17402 predicate(UseAVX == 0);
17403 match(Set dst (SubD dst src));
17404
17405 format %{ "subsd $dst, $src" %}
17406 ins_cost(150);
17407 ins_encode %{
17408 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17409 %}
17410 ins_pipe(pipe_slow);
17411 %}
17412
17413 instruct subD_mem(regD dst, memory src) %{
17414 predicate(UseAVX == 0);
17415 match(Set dst (SubD dst (LoadD src)));
17416
17417 format %{ "subsd $dst, $src" %}
17418 ins_cost(150);
17419 ins_encode %{
17420 __ subsd($dst$$XMMRegister, $src$$Address);
17421 %}
17422 ins_pipe(pipe_slow);
17423 %}
17424
17425 instruct subD_imm(regD dst, immD con) %{
17426 predicate(UseAVX == 0);
17427 match(Set dst (SubD dst con));
17428 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17429 ins_cost(150);
17430 ins_encode %{
17431 __ subsd($dst$$XMMRegister, $constantaddress($con));
17432 %}
17433 ins_pipe(pipe_slow);
17434 %}
17435
17436 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17437 predicate(UseAVX > 0);
17438 match(Set dst (SubD src1 src2));
17439
17440 format %{ "vsubsd $dst, $src1, $src2" %}
17441 ins_cost(150);
17442 ins_encode %{
17443 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17444 %}
17445 ins_pipe(pipe_slow);
17446 %}
17447
17448 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17449 predicate(UseAVX > 0);
17450 match(Set dst (SubD src1 (LoadD src2)));
17451
17452 format %{ "vsubsd $dst, $src1, $src2" %}
17453 ins_cost(150);
17454 ins_encode %{
17455 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17456 %}
17457 ins_pipe(pipe_slow);
17458 %}
17459
17460 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17461 predicate(UseAVX > 0);
17462 match(Set dst (SubD src con));
17463
17464 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17465 ins_cost(150);
17466 ins_encode %{
17467 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17468 %}
17469 ins_pipe(pipe_slow);
17470 %}
17471
17472 instruct mulF_reg(regF dst, regF src) %{
17473 predicate(UseAVX == 0);
17474 match(Set dst (MulF dst src));
17475
17476 format %{ "mulss $dst, $src" %}
17477 ins_cost(150);
17478 ins_encode %{
17479 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17480 %}
17481 ins_pipe(pipe_slow);
17482 %}
17483
17484 instruct mulF_mem(regF dst, memory src) %{
17485 predicate(UseAVX == 0);
17486 match(Set dst (MulF dst (LoadF src)));
17487
17488 format %{ "mulss $dst, $src" %}
17489 ins_cost(150);
17490 ins_encode %{
17491 __ mulss($dst$$XMMRegister, $src$$Address);
17492 %}
17493 ins_pipe(pipe_slow);
17494 %}
17495
17496 instruct mulF_imm(regF dst, immF con) %{
17497 predicate(UseAVX == 0);
17498 match(Set dst (MulF dst con));
17499 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17500 ins_cost(150);
17501 ins_encode %{
17502 __ mulss($dst$$XMMRegister, $constantaddress($con));
17503 %}
17504 ins_pipe(pipe_slow);
17505 %}
17506
17507 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17508 predicate(UseAVX > 0);
17509 match(Set dst (MulF src1 src2));
17510
17511 format %{ "vmulss $dst, $src1, $src2" %}
17512 ins_cost(150);
17513 ins_encode %{
17514 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17515 %}
17516 ins_pipe(pipe_slow);
17517 %}
17518
17519 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17520 predicate(UseAVX > 0);
17521 match(Set dst (MulF src1 (LoadF src2)));
17522
17523 format %{ "vmulss $dst, $src1, $src2" %}
17524 ins_cost(150);
17525 ins_encode %{
17526 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17527 %}
17528 ins_pipe(pipe_slow);
17529 %}
17530
17531 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17532 predicate(UseAVX > 0);
17533 match(Set dst (MulF src con));
17534
17535 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17536 ins_cost(150);
17537 ins_encode %{
17538 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17539 %}
17540 ins_pipe(pipe_slow);
17541 %}
17542
17543 instruct mulD_reg(regD dst, regD src) %{
17544 predicate(UseAVX == 0);
17545 match(Set dst (MulD dst src));
17546
17547 format %{ "mulsd $dst, $src" %}
17548 ins_cost(150);
17549 ins_encode %{
17550 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17551 %}
17552 ins_pipe(pipe_slow);
17553 %}
17554
17555 instruct mulD_mem(regD dst, memory src) %{
17556 predicate(UseAVX == 0);
17557 match(Set dst (MulD dst (LoadD src)));
17558
17559 format %{ "mulsd $dst, $src" %}
17560 ins_cost(150);
17561 ins_encode %{
17562 __ mulsd($dst$$XMMRegister, $src$$Address);
17563 %}
17564 ins_pipe(pipe_slow);
17565 %}
17566
17567 instruct mulD_imm(regD dst, immD con) %{
17568 predicate(UseAVX == 0);
17569 match(Set dst (MulD dst con));
17570 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17571 ins_cost(150);
17572 ins_encode %{
17573 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17574 %}
17575 ins_pipe(pipe_slow);
17576 %}
17577
17578 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17579 predicate(UseAVX > 0);
17580 match(Set dst (MulD src1 src2));
17581
17582 format %{ "vmulsd $dst, $src1, $src2" %}
17583 ins_cost(150);
17584 ins_encode %{
17585 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17586 %}
17587 ins_pipe(pipe_slow);
17588 %}
17589
17590 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17591 predicate(UseAVX > 0);
17592 match(Set dst (MulD src1 (LoadD src2)));
17593
17594 format %{ "vmulsd $dst, $src1, $src2" %}
17595 ins_cost(150);
17596 ins_encode %{
17597 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17598 %}
17599 ins_pipe(pipe_slow);
17600 %}
17601
17602 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17603 predicate(UseAVX > 0);
17604 match(Set dst (MulD src con));
17605
17606 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17607 ins_cost(150);
17608 ins_encode %{
17609 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17610 %}
17611 ins_pipe(pipe_slow);
17612 %}
17613
17614 instruct divF_reg(regF dst, regF src) %{
17615 predicate(UseAVX == 0);
17616 match(Set dst (DivF dst src));
17617
17618 format %{ "divss $dst, $src" %}
17619 ins_cost(150);
17620 ins_encode %{
17621 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17622 %}
17623 ins_pipe(pipe_slow);
17624 %}
17625
17626 instruct divF_mem(regF dst, memory src) %{
17627 predicate(UseAVX == 0);
17628 match(Set dst (DivF dst (LoadF src)));
17629
17630 format %{ "divss $dst, $src" %}
17631 ins_cost(150);
17632 ins_encode %{
17633 __ divss($dst$$XMMRegister, $src$$Address);
17634 %}
17635 ins_pipe(pipe_slow);
17636 %}
17637
17638 instruct divF_imm(regF dst, immF con) %{
17639 predicate(UseAVX == 0);
17640 match(Set dst (DivF dst con));
17641 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17642 ins_cost(150);
17643 ins_encode %{
17644 __ divss($dst$$XMMRegister, $constantaddress($con));
17645 %}
17646 ins_pipe(pipe_slow);
17647 %}
17648
17649 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17650 predicate(UseAVX > 0);
17651 match(Set dst (DivF src1 src2));
17652
17653 format %{ "vdivss $dst, $src1, $src2" %}
17654 ins_cost(150);
17655 ins_encode %{
17656 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17657 %}
17658 ins_pipe(pipe_slow);
17659 %}
17660
17661 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17662 predicate(UseAVX > 0);
17663 match(Set dst (DivF src1 (LoadF src2)));
17664
17665 format %{ "vdivss $dst, $src1, $src2" %}
17666 ins_cost(150);
17667 ins_encode %{
17668 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17669 %}
17670 ins_pipe(pipe_slow);
17671 %}
17672
17673 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17674 predicate(UseAVX > 0);
17675 match(Set dst (DivF src con));
17676
17677 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17678 ins_cost(150);
17679 ins_encode %{
17680 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17681 %}
17682 ins_pipe(pipe_slow);
17683 %}
17684
17685 instruct divD_reg(regD dst, regD src) %{
17686 predicate(UseAVX == 0);
17687 match(Set dst (DivD dst src));
17688
17689 format %{ "divsd $dst, $src" %}
17690 ins_cost(150);
17691 ins_encode %{
17692 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17693 %}
17694 ins_pipe(pipe_slow);
17695 %}
17696
17697 instruct divD_mem(regD dst, memory src) %{
17698 predicate(UseAVX == 0);
17699 match(Set dst (DivD dst (LoadD src)));
17700
17701 format %{ "divsd $dst, $src" %}
17702 ins_cost(150);
17703 ins_encode %{
17704 __ divsd($dst$$XMMRegister, $src$$Address);
17705 %}
17706 ins_pipe(pipe_slow);
17707 %}
17708
17709 instruct divD_imm(regD dst, immD con) %{
17710 predicate(UseAVX == 0);
17711 match(Set dst (DivD dst con));
17712 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17713 ins_cost(150);
17714 ins_encode %{
17715 __ divsd($dst$$XMMRegister, $constantaddress($con));
17716 %}
17717 ins_pipe(pipe_slow);
17718 %}
17719
17720 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17721 predicate(UseAVX > 0);
17722 match(Set dst (DivD src1 src2));
17723
17724 format %{ "vdivsd $dst, $src1, $src2" %}
17725 ins_cost(150);
17726 ins_encode %{
17727 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17728 %}
17729 ins_pipe(pipe_slow);
17730 %}
17731
17732 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17733 predicate(UseAVX > 0);
17734 match(Set dst (DivD src1 (LoadD src2)));
17735
17736 format %{ "vdivsd $dst, $src1, $src2" %}
17737 ins_cost(150);
17738 ins_encode %{
17739 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17740 %}
17741 ins_pipe(pipe_slow);
17742 %}
17743
17744 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17745 predicate(UseAVX > 0);
17746 match(Set dst (DivD src con));
17747
17748 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17749 ins_cost(150);
17750 ins_encode %{
17751 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17752 %}
17753 ins_pipe(pipe_slow);
17754 %}
17755
17756 instruct absF_reg(regF dst) %{
17757 predicate(UseAVX == 0);
17758 match(Set dst (AbsF dst));
17759 ins_cost(150);
17760 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17761 ins_encode %{
17762 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17763 %}
17764 ins_pipe(pipe_slow);
17765 %}
17766
17767 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17768 predicate(UseAVX > 0);
17769 match(Set dst (AbsF src));
17770 ins_cost(150);
17771 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17772 ins_encode %{
17773 int vlen_enc = Assembler::AVX_128bit;
17774 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17775 ExternalAddress(float_signmask()), vlen_enc);
17776 %}
17777 ins_pipe(pipe_slow);
17778 %}
17779
17780 instruct absD_reg(regD dst) %{
17781 predicate(UseAVX == 0);
17782 match(Set dst (AbsD dst));
17783 ins_cost(150);
17784 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17785 "# abs double by sign masking" %}
17786 ins_encode %{
17787 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17788 %}
17789 ins_pipe(pipe_slow);
17790 %}
17791
17792 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17793 predicate(UseAVX > 0);
17794 match(Set dst (AbsD src));
17795 ins_cost(150);
17796 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17797 "# abs double by sign masking" %}
17798 ins_encode %{
17799 int vlen_enc = Assembler::AVX_128bit;
17800 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17801 ExternalAddress(double_signmask()), vlen_enc);
17802 %}
17803 ins_pipe(pipe_slow);
17804 %}
17805
17806 instruct negF_reg(regF dst) %{
17807 predicate(UseAVX == 0);
17808 match(Set dst (NegF dst));
17809 ins_cost(150);
17810 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17811 ins_encode %{
17812 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17813 %}
17814 ins_pipe(pipe_slow);
17815 %}
17816
17817 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17818 predicate(UseAVX > 0);
17819 match(Set dst (NegF src));
17820 ins_cost(150);
17821 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17822 ins_encode %{
17823 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17824 ExternalAddress(float_signflip()));
17825 %}
17826 ins_pipe(pipe_slow);
17827 %}
17828
17829 instruct negD_reg(regD dst) %{
17830 predicate(UseAVX == 0);
17831 match(Set dst (NegD dst));
17832 ins_cost(150);
17833 format %{ "xorpd $dst, [0x8000000000000000]\t"
17834 "# neg double by sign flipping" %}
17835 ins_encode %{
17836 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17837 %}
17838 ins_pipe(pipe_slow);
17839 %}
17840
17841 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17842 predicate(UseAVX > 0);
17843 match(Set dst (NegD src));
17844 ins_cost(150);
17845 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17846 "# neg double by sign flipping" %}
17847 ins_encode %{
17848 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17849 ExternalAddress(double_signflip()));
17850 %}
17851 ins_pipe(pipe_slow);
17852 %}
17853
17854 // sqrtss instruction needs destination register to be pre initialized for best performance
17855 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17856 instruct sqrtF_reg(regF dst) %{
17857 match(Set dst (SqrtF dst));
17858 format %{ "sqrtss $dst, $dst" %}
17859 ins_encode %{
17860 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17861 %}
17862 ins_pipe(pipe_slow);
17863 %}
17864
17865 // sqrtsd instruction needs destination register to be pre initialized for best performance
17866 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17867 instruct sqrtD_reg(regD dst) %{
17868 match(Set dst (SqrtD dst));
17869 format %{ "sqrtsd $dst, $dst" %}
17870 ins_encode %{
17871 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17872 %}
17873 ins_pipe(pipe_slow);
17874 %}
17875
17876 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17877 effect(TEMP tmp);
17878 match(Set dst (ConvF2HF src));
17879 ins_cost(125);
17880 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17881 ins_encode %{
17882 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17883 %}
17884 ins_pipe( pipe_slow );
17885 %}
17886
17887 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17888 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17889 effect(TEMP ktmp, TEMP rtmp);
17890 match(Set mem (StoreC mem (ConvF2HF src)));
17891 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17892 ins_encode %{
17893 __ movl($rtmp$$Register, 0x1);
17894 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17895 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17896 %}
17897 ins_pipe( pipe_slow );
17898 %}
17899
17900 instruct vconvF2HF(vec dst, vec src) %{
17901 match(Set dst (VectorCastF2HF src));
17902 format %{ "vector_conv_F2HF $dst $src" %}
17903 ins_encode %{
17904 int vlen_enc = vector_length_encoding(this, $src);
17905 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17906 %}
17907 ins_pipe( pipe_slow );
17908 %}
17909
17910 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17911 predicate(n->as_StoreVector()->memory_size() >= 16);
17912 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17913 format %{ "vcvtps2ph $mem,$src" %}
17914 ins_encode %{
17915 int vlen_enc = vector_length_encoding(this, $src);
17916 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17917 %}
17918 ins_pipe( pipe_slow );
17919 %}
17920
17921 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17922 match(Set dst (ConvHF2F src));
17923 format %{ "vcvtph2ps $dst,$src" %}
17924 ins_encode %{
17925 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17926 %}
17927 ins_pipe( pipe_slow );
17928 %}
17929
17930 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17931 match(Set dst (VectorCastHF2F (LoadVector mem)));
17932 format %{ "vcvtph2ps $dst,$mem" %}
17933 ins_encode %{
17934 int vlen_enc = vector_length_encoding(this);
17935 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17936 %}
17937 ins_pipe( pipe_slow );
17938 %}
17939
17940 instruct vconvHF2F(vec dst, vec src) %{
17941 match(Set dst (VectorCastHF2F src));
17942 ins_cost(125);
17943 format %{ "vector_conv_HF2F $dst,$src" %}
17944 ins_encode %{
17945 int vlen_enc = vector_length_encoding(this);
17946 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17947 %}
17948 ins_pipe( pipe_slow );
17949 %}
17950
17951 // ---------------------------------------- VectorReinterpret ------------------------------------
17952 instruct reinterpret_mask(kReg dst) %{
17953 predicate(n->bottom_type()->isa_vectmask() &&
17954 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17955 match(Set dst (VectorReinterpret dst));
17956 ins_cost(125);
17957 format %{ "vector_reinterpret $dst\t!" %}
17958 ins_encode %{
17959 // empty
17960 %}
17961 ins_pipe( pipe_slow );
17962 %}
17963
17964 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17965 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17966 n->bottom_type()->isa_vectmask() &&
17967 n->in(1)->bottom_type()->isa_vectmask() &&
17968 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
17969 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17970 match(Set dst (VectorReinterpret src));
17971 effect(TEMP xtmp);
17972 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17973 ins_encode %{
17974 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17975 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17976 assert(src_sz == dst_sz , "src and dst size mismatch");
17977 int vlen_enc = vector_length_encoding(src_sz);
17978 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17979 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17980 %}
17981 ins_pipe( pipe_slow );
17982 %}
17983
17984 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17985 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17986 n->bottom_type()->isa_vectmask() &&
17987 n->in(1)->bottom_type()->isa_vectmask() &&
17988 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
17989 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
17990 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17991 match(Set dst (VectorReinterpret src));
17992 effect(TEMP xtmp);
17993 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17994 ins_encode %{
17995 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17996 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17997 assert(src_sz == dst_sz , "src and dst size mismatch");
17998 int vlen_enc = vector_length_encoding(src_sz);
17999 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18000 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18001 %}
18002 ins_pipe( pipe_slow );
18003 %}
18004
18005 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18006 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18007 n->bottom_type()->isa_vectmask() &&
18008 n->in(1)->bottom_type()->isa_vectmask() &&
18009 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18010 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18011 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18012 match(Set dst (VectorReinterpret src));
18013 effect(TEMP xtmp);
18014 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18015 ins_encode %{
18016 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18017 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18018 assert(src_sz == dst_sz , "src and dst size mismatch");
18019 int vlen_enc = vector_length_encoding(src_sz);
18020 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18021 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18022 %}
18023 ins_pipe( pipe_slow );
18024 %}
18025
18026 instruct reinterpret(vec dst) %{
18027 predicate(!n->bottom_type()->isa_vectmask() &&
18028 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18029 match(Set dst (VectorReinterpret dst));
18030 ins_cost(125);
18031 format %{ "vector_reinterpret $dst\t!" %}
18032 ins_encode %{
18033 // empty
18034 %}
18035 ins_pipe( pipe_slow );
18036 %}
18037
18038 instruct reinterpret_expand(vec dst, vec src) %{
18039 predicate(UseAVX == 0 &&
18040 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18041 match(Set dst (VectorReinterpret src));
18042 ins_cost(125);
18043 effect(TEMP dst);
18044 format %{ "vector_reinterpret_expand $dst,$src" %}
18045 ins_encode %{
18046 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18047 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18048
18049 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18050 if (src_vlen_in_bytes == 4) {
18051 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18052 } else {
18053 assert(src_vlen_in_bytes == 8, "");
18054 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18055 }
18056 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18057 %}
18058 ins_pipe( pipe_slow );
18059 %}
18060
18061 instruct vreinterpret_expand4(legVec dst, vec src) %{
18062 predicate(UseAVX > 0 &&
18063 !n->bottom_type()->isa_vectmask() &&
18064 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18065 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18066 match(Set dst (VectorReinterpret src));
18067 ins_cost(125);
18068 format %{ "vector_reinterpret_expand $dst,$src" %}
18069 ins_encode %{
18070 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18071 %}
18072 ins_pipe( pipe_slow );
18073 %}
18074
18075
18076 instruct vreinterpret_expand(legVec dst, vec src) %{
18077 predicate(UseAVX > 0 &&
18078 !n->bottom_type()->isa_vectmask() &&
18079 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18080 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18081 match(Set dst (VectorReinterpret src));
18082 ins_cost(125);
18083 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18084 ins_encode %{
18085 switch (Matcher::vector_length_in_bytes(this, $src)) {
18086 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18087 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18088 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18089 default: ShouldNotReachHere();
18090 }
18091 %}
18092 ins_pipe( pipe_slow );
18093 %}
18094
18095 instruct reinterpret_shrink(vec dst, legVec src) %{
18096 predicate(!n->bottom_type()->isa_vectmask() &&
18097 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18098 match(Set dst (VectorReinterpret src));
18099 ins_cost(125);
18100 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18101 ins_encode %{
18102 switch (Matcher::vector_length_in_bytes(this)) {
18103 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18104 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18105 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18106 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18107 default: ShouldNotReachHere();
18108 }
18109 %}
18110 ins_pipe( pipe_slow );
18111 %}
18112
18113 // ----------------------------------------------------------------------------------------------------
18114
18115 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18116 match(Set dst (RoundDoubleMode src rmode));
18117 format %{ "roundsd $dst,$src" %}
18118 ins_cost(150);
18119 ins_encode %{
18120 assert(UseSSE >= 4, "required");
18121 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18122 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18123 }
18124 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18125 %}
18126 ins_pipe(pipe_slow);
18127 %}
18128
18129 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18130 match(Set dst (RoundDoubleMode con rmode));
18131 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18132 ins_cost(150);
18133 ins_encode %{
18134 assert(UseSSE >= 4, "required");
18135 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18136 %}
18137 ins_pipe(pipe_slow);
18138 %}
18139
18140 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18141 predicate(Matcher::vector_length(n) < 8);
18142 match(Set dst (RoundDoubleModeV src rmode));
18143 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18144 ins_encode %{
18145 assert(UseAVX > 0, "required");
18146 int vlen_enc = vector_length_encoding(this);
18147 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18148 %}
18149 ins_pipe( pipe_slow );
18150 %}
18151
18152 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18153 predicate(Matcher::vector_length(n) == 8);
18154 match(Set dst (RoundDoubleModeV src rmode));
18155 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18156 ins_encode %{
18157 assert(UseAVX > 2, "required");
18158 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18159 %}
18160 ins_pipe( pipe_slow );
18161 %}
18162
18163 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18164 predicate(Matcher::vector_length(n) < 8);
18165 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18166 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18167 ins_encode %{
18168 assert(UseAVX > 0, "required");
18169 int vlen_enc = vector_length_encoding(this);
18170 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18171 %}
18172 ins_pipe( pipe_slow );
18173 %}
18174
18175 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18176 predicate(Matcher::vector_length(n) == 8);
18177 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18178 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18179 ins_encode %{
18180 assert(UseAVX > 2, "required");
18181 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18182 %}
18183 ins_pipe( pipe_slow );
18184 %}
18185
18186 instruct onspinwait() %{
18187 match(OnSpinWait);
18188 ins_cost(200);
18189
18190 format %{
18191 $$template
18192 $$emit$$"pause\t! membar_onspinwait"
18193 %}
18194 ins_encode %{
18195 __ pause();
18196 %}
18197 ins_pipe(pipe_slow);
18198 %}
18199
18200 // a * b + c
18201 instruct fmaD_reg(regD a, regD b, regD c) %{
18202 match(Set c (FmaD c (Binary a b)));
18203 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18204 ins_cost(150);
18205 ins_encode %{
18206 assert(UseFMA, "Needs FMA instructions support.");
18207 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18208 %}
18209 ins_pipe( pipe_slow );
18210 %}
18211
18212 // a * b + c
18213 instruct fmaF_reg(regF a, regF b, regF c) %{
18214 match(Set c (FmaF c (Binary a b)));
18215 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18216 ins_cost(150);
18217 ins_encode %{
18218 assert(UseFMA, "Needs FMA instructions support.");
18219 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18220 %}
18221 ins_pipe( pipe_slow );
18222 %}
18223
18224 // ====================VECTOR INSTRUCTIONS=====================================
18225
18226 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18227 instruct MoveVec2Leg(legVec dst, vec src) %{
18228 match(Set dst src);
18229 format %{ "" %}
18230 ins_encode %{
18231 ShouldNotReachHere();
18232 %}
18233 ins_pipe( fpu_reg_reg );
18234 %}
18235
18236 instruct MoveLeg2Vec(vec dst, legVec src) %{
18237 match(Set dst src);
18238 format %{ "" %}
18239 ins_encode %{
18240 ShouldNotReachHere();
18241 %}
18242 ins_pipe( fpu_reg_reg );
18243 %}
18244
18245 // ============================================================================
18246
18247 // Load vectors generic operand pattern
18248 instruct loadV(vec dst, memory mem) %{
18249 match(Set dst (LoadVector mem));
18250 ins_cost(125);
18251 format %{ "load_vector $dst,$mem" %}
18252 ins_encode %{
18253 BasicType bt = Matcher::vector_element_basic_type(this);
18254 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18255 %}
18256 ins_pipe( pipe_slow );
18257 %}
18258
18259 // Store vectors generic operand pattern.
18260 instruct storeV(memory mem, vec src) %{
18261 match(Set mem (StoreVector mem src));
18262 ins_cost(145);
18263 format %{ "store_vector $mem,$src\n\t" %}
18264 ins_encode %{
18265 switch (Matcher::vector_length_in_bytes(this, $src)) {
18266 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18267 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18268 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18269 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18270 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18271 default: ShouldNotReachHere();
18272 }
18273 %}
18274 ins_pipe( pipe_slow );
18275 %}
18276
18277 // ---------------------------------------- Gather ------------------------------------
18278
18279 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18280
18281 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18282 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18283 Matcher::vector_length_in_bytes(n) <= 32);
18284 match(Set dst (LoadVectorGather mem idx));
18285 effect(TEMP dst, TEMP tmp, TEMP mask);
18286 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18287 ins_encode %{
18288 int vlen_enc = vector_length_encoding(this);
18289 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18290 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18291 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18292 __ lea($tmp$$Register, $mem$$Address);
18293 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18294 %}
18295 ins_pipe( pipe_slow );
18296 %}
18297
18298
18299 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18300 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18301 !is_subword_type(Matcher::vector_element_basic_type(n)));
18302 match(Set dst (LoadVectorGather mem idx));
18303 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18304 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18305 ins_encode %{
18306 int vlen_enc = vector_length_encoding(this);
18307 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18308 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18309 __ lea($tmp$$Register, $mem$$Address);
18310 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18311 %}
18312 ins_pipe( pipe_slow );
18313 %}
18314
18315 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18316 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18317 !is_subword_type(Matcher::vector_element_basic_type(n)));
18318 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18319 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18320 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18321 ins_encode %{
18322 assert(UseAVX > 2, "sanity");
18323 int vlen_enc = vector_length_encoding(this);
18324 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18325 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18326 // Note: Since gather instruction partially updates the opmask register used
18327 // for predication hense moving mask operand to a temporary.
18328 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18329 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18330 __ lea($tmp$$Register, $mem$$Address);
18331 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18332 %}
18333 ins_pipe( pipe_slow );
18334 %}
18335
18336 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18337 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18338 match(Set dst (LoadVectorGather mem idx_base));
18339 effect(TEMP tmp, TEMP rtmp);
18340 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18341 ins_encode %{
18342 int vlen_enc = vector_length_encoding(this);
18343 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18344 __ lea($tmp$$Register, $mem$$Address);
18345 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18346 %}
18347 ins_pipe( pipe_slow );
18348 %}
18349
18350 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18351 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18352 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18353 match(Set dst (LoadVectorGather mem idx_base));
18354 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18355 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18356 ins_encode %{
18357 int vlen_enc = vector_length_encoding(this);
18358 int vector_len = Matcher::vector_length(this);
18359 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18360 __ lea($tmp$$Register, $mem$$Address);
18361 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18362 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18363 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18364 %}
18365 ins_pipe( pipe_slow );
18366 %}
18367
18368 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18369 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18370 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18371 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18372 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18373 ins_encode %{
18374 int vlen_enc = vector_length_encoding(this);
18375 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18376 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18377 __ lea($tmp$$Register, $mem$$Address);
18378 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18379 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18380 %}
18381 ins_pipe( pipe_slow );
18382 %}
18383
18384 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18385 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18386 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18387 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18388 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18389 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18390 ins_encode %{
18391 int vlen_enc = vector_length_encoding(this);
18392 int vector_len = Matcher::vector_length(this);
18393 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18394 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18395 __ lea($tmp$$Register, $mem$$Address);
18396 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18397 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18398 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18399 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18400 %}
18401 ins_pipe( pipe_slow );
18402 %}
18403
18404 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18405 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18406 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18407 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18408 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18409 ins_encode %{
18410 int vlen_enc = vector_length_encoding(this);
18411 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18412 __ lea($tmp$$Register, $mem$$Address);
18413 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18414 if (elem_bt == T_SHORT) {
18415 __ movl($mask_idx$$Register, 0x55555555);
18416 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18417 }
18418 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18419 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18420 %}
18421 ins_pipe( pipe_slow );
18422 %}
18423
18424 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18425 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18426 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18427 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18428 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18429 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18430 ins_encode %{
18431 int vlen_enc = vector_length_encoding(this);
18432 int vector_len = Matcher::vector_length(this);
18433 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18434 __ lea($tmp$$Register, $mem$$Address);
18435 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18436 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18437 if (elem_bt == T_SHORT) {
18438 __ movl($mask_idx$$Register, 0x55555555);
18439 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18440 }
18441 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18442 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18443 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18444 %}
18445 ins_pipe( pipe_slow );
18446 %}
18447
18448 // ====================Scatter=======================================
18449
18450 // Scatter INT, LONG, FLOAT, DOUBLE
18451
18452 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18453 predicate(UseAVX > 2);
18454 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18455 effect(TEMP tmp, TEMP ktmp);
18456 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18457 ins_encode %{
18458 int vlen_enc = vector_length_encoding(this, $src);
18459 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18460
18461 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18462 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18463
18464 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18465 __ lea($tmp$$Register, $mem$$Address);
18466 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18467 %}
18468 ins_pipe( pipe_slow );
18469 %}
18470
18471 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18472 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18473 effect(TEMP tmp, TEMP ktmp);
18474 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18475 ins_encode %{
18476 int vlen_enc = vector_length_encoding(this, $src);
18477 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18478 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18479 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18480 // Note: Since scatter instruction partially updates the opmask register used
18481 // for predication hense moving mask operand to a temporary.
18482 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18483 __ lea($tmp$$Register, $mem$$Address);
18484 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18485 %}
18486 ins_pipe( pipe_slow );
18487 %}
18488
18489 // ====================REPLICATE=======================================
18490
18491 // Replicate byte scalar to be vector
18492 instruct vReplB_reg(vec dst, rRegI src) %{
18493 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18494 match(Set dst (Replicate src));
18495 format %{ "replicateB $dst,$src" %}
18496 ins_encode %{
18497 uint vlen = Matcher::vector_length(this);
18498 if (UseAVX >= 2) {
18499 int vlen_enc = vector_length_encoding(this);
18500 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18501 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18502 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18503 } else {
18504 __ movdl($dst$$XMMRegister, $src$$Register);
18505 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18506 }
18507 } else {
18508 assert(UseAVX < 2, "");
18509 __ movdl($dst$$XMMRegister, $src$$Register);
18510 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18511 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18512 if (vlen >= 16) {
18513 assert(vlen == 16, "");
18514 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18515 }
18516 }
18517 %}
18518 ins_pipe( pipe_slow );
18519 %}
18520
18521 instruct ReplB_mem(vec dst, memory mem) %{
18522 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18523 match(Set dst (Replicate (LoadB mem)));
18524 format %{ "replicateB $dst,$mem" %}
18525 ins_encode %{
18526 int vlen_enc = vector_length_encoding(this);
18527 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18528 %}
18529 ins_pipe( pipe_slow );
18530 %}
18531
18532 // ====================ReplicateS=======================================
18533
18534 instruct vReplS_reg(vec dst, rRegI src) %{
18535 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18536 match(Set dst (Replicate src));
18537 format %{ "replicateS $dst,$src" %}
18538 ins_encode %{
18539 uint vlen = Matcher::vector_length(this);
18540 int vlen_enc = vector_length_encoding(this);
18541 if (UseAVX >= 2) {
18542 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18543 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18544 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18545 } else {
18546 __ movdl($dst$$XMMRegister, $src$$Register);
18547 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18548 }
18549 } else {
18550 assert(UseAVX < 2, "");
18551 __ movdl($dst$$XMMRegister, $src$$Register);
18552 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18553 if (vlen >= 8) {
18554 assert(vlen == 8, "");
18555 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18556 }
18557 }
18558 %}
18559 ins_pipe( pipe_slow );
18560 %}
18561
18562 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18563 match(Set dst (Replicate con));
18564 effect(TEMP rtmp);
18565 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18566 ins_encode %{
18567 int vlen_enc = vector_length_encoding(this);
18568 BasicType bt = Matcher::vector_element_basic_type(this);
18569 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18570 __ movl($rtmp$$Register, $con$$constant);
18571 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18572 %}
18573 ins_pipe( pipe_slow );
18574 %}
18575
18576 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18577 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18578 match(Set dst (Replicate src));
18579 effect(TEMP rtmp);
18580 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18581 ins_encode %{
18582 int vlen_enc = vector_length_encoding(this);
18583 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18584 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18585 %}
18586 ins_pipe( pipe_slow );
18587 %}
18588
18589 instruct ReplS_mem(vec dst, memory mem) %{
18590 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18591 match(Set dst (Replicate (LoadS mem)));
18592 format %{ "replicateS $dst,$mem" %}
18593 ins_encode %{
18594 int vlen_enc = vector_length_encoding(this);
18595 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18596 %}
18597 ins_pipe( pipe_slow );
18598 %}
18599
18600 // ====================ReplicateI=======================================
18601
18602 instruct ReplI_reg(vec dst, rRegI src) %{
18603 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18604 match(Set dst (Replicate src));
18605 format %{ "replicateI $dst,$src" %}
18606 ins_encode %{
18607 uint vlen = Matcher::vector_length(this);
18608 int vlen_enc = vector_length_encoding(this);
18609 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18610 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18611 } else if (VM_Version::supports_avx2()) {
18612 __ movdl($dst$$XMMRegister, $src$$Register);
18613 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18614 } else {
18615 __ movdl($dst$$XMMRegister, $src$$Register);
18616 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18617 }
18618 %}
18619 ins_pipe( pipe_slow );
18620 %}
18621
18622 instruct ReplI_mem(vec dst, memory mem) %{
18623 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18624 match(Set dst (Replicate (LoadI mem)));
18625 format %{ "replicateI $dst,$mem" %}
18626 ins_encode %{
18627 int vlen_enc = vector_length_encoding(this);
18628 if (VM_Version::supports_avx2()) {
18629 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18630 } else if (VM_Version::supports_avx()) {
18631 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18632 } else {
18633 __ movdl($dst$$XMMRegister, $mem$$Address);
18634 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18635 }
18636 %}
18637 ins_pipe( pipe_slow );
18638 %}
18639
18640 instruct ReplI_imm(vec dst, immI con) %{
18641 predicate(Matcher::is_non_long_integral_vector(n));
18642 match(Set dst (Replicate con));
18643 format %{ "replicateI $dst,$con" %}
18644 ins_encode %{
18645 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18646 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18647 type2aelembytes(Matcher::vector_element_basic_type(this))));
18648 BasicType bt = Matcher::vector_element_basic_type(this);
18649 int vlen = Matcher::vector_length_in_bytes(this);
18650 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18651 %}
18652 ins_pipe( pipe_slow );
18653 %}
18654
18655 // Replicate scalar zero to be vector
18656 instruct ReplI_zero(vec dst, immI_0 zero) %{
18657 predicate(Matcher::is_non_long_integral_vector(n));
18658 match(Set dst (Replicate zero));
18659 format %{ "replicateI $dst,$zero" %}
18660 ins_encode %{
18661 int vlen_enc = vector_length_encoding(this);
18662 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18663 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18664 } else {
18665 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18666 }
18667 %}
18668 ins_pipe( fpu_reg_reg );
18669 %}
18670
18671 instruct ReplI_M1(vec dst, immI_M1 con) %{
18672 predicate(Matcher::is_non_long_integral_vector(n));
18673 match(Set dst (Replicate con));
18674 format %{ "vallones $dst" %}
18675 ins_encode %{
18676 int vector_len = vector_length_encoding(this);
18677 __ vallones($dst$$XMMRegister, vector_len);
18678 %}
18679 ins_pipe( pipe_slow );
18680 %}
18681
18682 // ====================ReplicateL=======================================
18683
18684 // Replicate long (8 byte) scalar to be vector
18685 instruct ReplL_reg(vec dst, rRegL src) %{
18686 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18687 match(Set dst (Replicate src));
18688 format %{ "replicateL $dst,$src" %}
18689 ins_encode %{
18690 int vlen = Matcher::vector_length(this);
18691 int vlen_enc = vector_length_encoding(this);
18692 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18693 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18694 } else if (VM_Version::supports_avx2()) {
18695 __ movdq($dst$$XMMRegister, $src$$Register);
18696 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18697 } else {
18698 __ movdq($dst$$XMMRegister, $src$$Register);
18699 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18700 }
18701 %}
18702 ins_pipe( pipe_slow );
18703 %}
18704
18705 instruct ReplL_mem(vec dst, memory mem) %{
18706 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18707 match(Set dst (Replicate (LoadL mem)));
18708 format %{ "replicateL $dst,$mem" %}
18709 ins_encode %{
18710 int vlen_enc = vector_length_encoding(this);
18711 if (VM_Version::supports_avx2()) {
18712 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18713 } else if (VM_Version::supports_sse3()) {
18714 __ movddup($dst$$XMMRegister, $mem$$Address);
18715 } else {
18716 __ movq($dst$$XMMRegister, $mem$$Address);
18717 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18718 }
18719 %}
18720 ins_pipe( pipe_slow );
18721 %}
18722
18723 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18724 instruct ReplL_imm(vec dst, immL con) %{
18725 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18726 match(Set dst (Replicate con));
18727 format %{ "replicateL $dst,$con" %}
18728 ins_encode %{
18729 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18730 int vlen = Matcher::vector_length_in_bytes(this);
18731 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18732 %}
18733 ins_pipe( pipe_slow );
18734 %}
18735
18736 instruct ReplL_zero(vec dst, immL0 zero) %{
18737 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18738 match(Set dst (Replicate zero));
18739 format %{ "replicateL $dst,$zero" %}
18740 ins_encode %{
18741 int vlen_enc = vector_length_encoding(this);
18742 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18743 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18744 } else {
18745 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18746 }
18747 %}
18748 ins_pipe( fpu_reg_reg );
18749 %}
18750
18751 instruct ReplL_M1(vec dst, immL_M1 con) %{
18752 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18753 match(Set dst (Replicate con));
18754 format %{ "vallones $dst" %}
18755 ins_encode %{
18756 int vector_len = vector_length_encoding(this);
18757 __ vallones($dst$$XMMRegister, vector_len);
18758 %}
18759 ins_pipe( pipe_slow );
18760 %}
18761
18762 // ====================ReplicateF=======================================
18763
18764 instruct vReplF_reg(vec dst, vlRegF src) %{
18765 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18766 match(Set dst (Replicate src));
18767 format %{ "replicateF $dst,$src" %}
18768 ins_encode %{
18769 uint vlen = Matcher::vector_length(this);
18770 int vlen_enc = vector_length_encoding(this);
18771 if (vlen <= 4) {
18772 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18773 } else if (VM_Version::supports_avx2()) {
18774 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18775 } else {
18776 assert(vlen == 8, "sanity");
18777 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18778 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18779 }
18780 %}
18781 ins_pipe( pipe_slow );
18782 %}
18783
18784 instruct ReplF_reg(vec dst, vlRegF src) %{
18785 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18786 match(Set dst (Replicate src));
18787 format %{ "replicateF $dst,$src" %}
18788 ins_encode %{
18789 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18790 %}
18791 ins_pipe( pipe_slow );
18792 %}
18793
18794 instruct ReplF_mem(vec dst, memory mem) %{
18795 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18796 match(Set dst (Replicate (LoadF mem)));
18797 format %{ "replicateF $dst,$mem" %}
18798 ins_encode %{
18799 int vlen_enc = vector_length_encoding(this);
18800 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18801 %}
18802 ins_pipe( pipe_slow );
18803 %}
18804
18805 // Replicate float scalar immediate to be vector by loading from const table.
18806 instruct ReplF_imm(vec dst, immF con) %{
18807 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18808 match(Set dst (Replicate con));
18809 format %{ "replicateF $dst,$con" %}
18810 ins_encode %{
18811 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18812 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18813 int vlen = Matcher::vector_length_in_bytes(this);
18814 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18815 %}
18816 ins_pipe( pipe_slow );
18817 %}
18818
18819 instruct ReplF_zero(vec dst, immF0 zero) %{
18820 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18821 match(Set dst (Replicate zero));
18822 format %{ "replicateF $dst,$zero" %}
18823 ins_encode %{
18824 int vlen_enc = vector_length_encoding(this);
18825 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18826 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18827 } else {
18828 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18829 }
18830 %}
18831 ins_pipe( fpu_reg_reg );
18832 %}
18833
18834 // ====================ReplicateD=======================================
18835
18836 // Replicate double (8 bytes) scalar to be vector
18837 instruct vReplD_reg(vec dst, vlRegD src) %{
18838 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18839 match(Set dst (Replicate src));
18840 format %{ "replicateD $dst,$src" %}
18841 ins_encode %{
18842 uint vlen = Matcher::vector_length(this);
18843 int vlen_enc = vector_length_encoding(this);
18844 if (vlen <= 2) {
18845 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18846 } else if (VM_Version::supports_avx2()) {
18847 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18848 } else {
18849 assert(vlen == 4, "sanity");
18850 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18851 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18852 }
18853 %}
18854 ins_pipe( pipe_slow );
18855 %}
18856
18857 instruct ReplD_reg(vec dst, vlRegD src) %{
18858 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18859 match(Set dst (Replicate src));
18860 format %{ "replicateD $dst,$src" %}
18861 ins_encode %{
18862 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18863 %}
18864 ins_pipe( pipe_slow );
18865 %}
18866
18867 instruct ReplD_mem(vec dst, memory mem) %{
18868 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18869 match(Set dst (Replicate (LoadD mem)));
18870 format %{ "replicateD $dst,$mem" %}
18871 ins_encode %{
18872 if (Matcher::vector_length(this) >= 4) {
18873 int vlen_enc = vector_length_encoding(this);
18874 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18875 } else {
18876 __ movddup($dst$$XMMRegister, $mem$$Address);
18877 }
18878 %}
18879 ins_pipe( pipe_slow );
18880 %}
18881
18882 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18883 instruct ReplD_imm(vec dst, immD con) %{
18884 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18885 match(Set dst (Replicate con));
18886 format %{ "replicateD $dst,$con" %}
18887 ins_encode %{
18888 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18889 int vlen = Matcher::vector_length_in_bytes(this);
18890 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18891 %}
18892 ins_pipe( pipe_slow );
18893 %}
18894
18895 instruct ReplD_zero(vec dst, immD0 zero) %{
18896 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18897 match(Set dst (Replicate zero));
18898 format %{ "replicateD $dst,$zero" %}
18899 ins_encode %{
18900 int vlen_enc = vector_length_encoding(this);
18901 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18902 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18903 } else {
18904 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18905 }
18906 %}
18907 ins_pipe( fpu_reg_reg );
18908 %}
18909
18910 // ====================VECTOR INSERT=======================================
18911
18912 instruct insert(vec dst, rRegI val, immU8 idx) %{
18913 predicate(Matcher::vector_length_in_bytes(n) < 32);
18914 match(Set dst (VectorInsert (Binary dst val) idx));
18915 format %{ "vector_insert $dst,$val,$idx" %}
18916 ins_encode %{
18917 assert(UseSSE >= 4, "required");
18918 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18919
18920 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18921
18922 assert(is_integral_type(elem_bt), "");
18923 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18924
18925 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18926 %}
18927 ins_pipe( pipe_slow );
18928 %}
18929
18930 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18931 predicate(Matcher::vector_length_in_bytes(n) == 32);
18932 match(Set dst (VectorInsert (Binary src val) idx));
18933 effect(TEMP vtmp);
18934 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18935 ins_encode %{
18936 int vlen_enc = Assembler::AVX_256bit;
18937 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18938 int elem_per_lane = 16/type2aelembytes(elem_bt);
18939 int log2epr = log2(elem_per_lane);
18940
18941 assert(is_integral_type(elem_bt), "sanity");
18942 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18943
18944 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18945 uint y_idx = ($idx$$constant >> log2epr) & 1;
18946 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18947 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18948 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18949 %}
18950 ins_pipe( pipe_slow );
18951 %}
18952
18953 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18954 predicate(Matcher::vector_length_in_bytes(n) == 64);
18955 match(Set dst (VectorInsert (Binary src val) idx));
18956 effect(TEMP vtmp);
18957 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18958 ins_encode %{
18959 assert(UseAVX > 2, "sanity");
18960
18961 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18962 int elem_per_lane = 16/type2aelembytes(elem_bt);
18963 int log2epr = log2(elem_per_lane);
18964
18965 assert(is_integral_type(elem_bt), "");
18966 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18967
18968 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18969 uint y_idx = ($idx$$constant >> log2epr) & 3;
18970 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18971 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18972 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18973 %}
18974 ins_pipe( pipe_slow );
18975 %}
18976
18977 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18978 predicate(Matcher::vector_length(n) == 2);
18979 match(Set dst (VectorInsert (Binary dst val) idx));
18980 format %{ "vector_insert $dst,$val,$idx" %}
18981 ins_encode %{
18982 assert(UseSSE >= 4, "required");
18983 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18984 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18985
18986 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18987 %}
18988 ins_pipe( pipe_slow );
18989 %}
18990
18991 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18992 predicate(Matcher::vector_length(n) == 4);
18993 match(Set dst (VectorInsert (Binary src val) idx));
18994 effect(TEMP vtmp);
18995 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18996 ins_encode %{
18997 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18998 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18999
19000 uint x_idx = $idx$$constant & right_n_bits(1);
19001 uint y_idx = ($idx$$constant >> 1) & 1;
19002 int vlen_enc = Assembler::AVX_256bit;
19003 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19004 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19005 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19006 %}
19007 ins_pipe( pipe_slow );
19008 %}
19009
19010 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19011 predicate(Matcher::vector_length(n) == 8);
19012 match(Set dst (VectorInsert (Binary src val) idx));
19013 effect(TEMP vtmp);
19014 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19015 ins_encode %{
19016 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19017 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19018
19019 uint x_idx = $idx$$constant & right_n_bits(1);
19020 uint y_idx = ($idx$$constant >> 1) & 3;
19021 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19022 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19023 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19024 %}
19025 ins_pipe( pipe_slow );
19026 %}
19027
19028 instruct insertF(vec dst, regF val, immU8 idx) %{
19029 predicate(Matcher::vector_length(n) < 8);
19030 match(Set dst (VectorInsert (Binary dst val) idx));
19031 format %{ "vector_insert $dst,$val,$idx" %}
19032 ins_encode %{
19033 assert(UseSSE >= 4, "sanity");
19034
19035 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19036 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19037
19038 uint x_idx = $idx$$constant & right_n_bits(2);
19039 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19040 %}
19041 ins_pipe( pipe_slow );
19042 %}
19043
19044 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19045 predicate(Matcher::vector_length(n) >= 8);
19046 match(Set dst (VectorInsert (Binary src val) idx));
19047 effect(TEMP vtmp);
19048 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19049 ins_encode %{
19050 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19051 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19052
19053 int vlen = Matcher::vector_length(this);
19054 uint x_idx = $idx$$constant & right_n_bits(2);
19055 if (vlen == 8) {
19056 uint y_idx = ($idx$$constant >> 2) & 1;
19057 int vlen_enc = Assembler::AVX_256bit;
19058 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19059 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19060 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19061 } else {
19062 assert(vlen == 16, "sanity");
19063 uint y_idx = ($idx$$constant >> 2) & 3;
19064 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19065 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19066 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19067 }
19068 %}
19069 ins_pipe( pipe_slow );
19070 %}
19071
19072 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19073 predicate(Matcher::vector_length(n) == 2);
19074 match(Set dst (VectorInsert (Binary dst val) idx));
19075 effect(TEMP tmp);
19076 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19077 ins_encode %{
19078 assert(UseSSE >= 4, "sanity");
19079 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19080 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19081
19082 __ movq($tmp$$Register, $val$$XMMRegister);
19083 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19084 %}
19085 ins_pipe( pipe_slow );
19086 %}
19087
19088 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19089 predicate(Matcher::vector_length(n) == 4);
19090 match(Set dst (VectorInsert (Binary src val) idx));
19091 effect(TEMP vtmp, TEMP tmp);
19092 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19093 ins_encode %{
19094 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19095 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19096
19097 uint x_idx = $idx$$constant & right_n_bits(1);
19098 uint y_idx = ($idx$$constant >> 1) & 1;
19099 int vlen_enc = Assembler::AVX_256bit;
19100 __ movq($tmp$$Register, $val$$XMMRegister);
19101 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19102 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19103 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19104 %}
19105 ins_pipe( pipe_slow );
19106 %}
19107
19108 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19109 predicate(Matcher::vector_length(n) == 8);
19110 match(Set dst (VectorInsert (Binary src val) idx));
19111 effect(TEMP tmp, TEMP vtmp);
19112 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19113 ins_encode %{
19114 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19115 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19116
19117 uint x_idx = $idx$$constant & right_n_bits(1);
19118 uint y_idx = ($idx$$constant >> 1) & 3;
19119 __ movq($tmp$$Register, $val$$XMMRegister);
19120 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19121 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19122 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19123 %}
19124 ins_pipe( pipe_slow );
19125 %}
19126
19127 // ====================REDUCTION ARITHMETIC=======================================
19128
19129 // =======================Int Reduction==========================================
19130
19131 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19132 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19133 match(Set dst (AddReductionVI src1 src2));
19134 match(Set dst (MulReductionVI src1 src2));
19135 match(Set dst (AndReductionV src1 src2));
19136 match(Set dst ( OrReductionV src1 src2));
19137 match(Set dst (XorReductionV src1 src2));
19138 match(Set dst (MinReductionV src1 src2));
19139 match(Set dst (MaxReductionV src1 src2));
19140 effect(TEMP vtmp1, TEMP vtmp2);
19141 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19142 ins_encode %{
19143 int opcode = this->ideal_Opcode();
19144 int vlen = Matcher::vector_length(this, $src2);
19145 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19146 %}
19147 ins_pipe( pipe_slow );
19148 %}
19149
19150 // =======================Long Reduction==========================================
19151
19152 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19153 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19154 match(Set dst (AddReductionVL src1 src2));
19155 match(Set dst (MulReductionVL src1 src2));
19156 match(Set dst (AndReductionV src1 src2));
19157 match(Set dst ( OrReductionV src1 src2));
19158 match(Set dst (XorReductionV src1 src2));
19159 match(Set dst (MinReductionV src1 src2));
19160 match(Set dst (MaxReductionV src1 src2));
19161 effect(TEMP vtmp1, TEMP vtmp2);
19162 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19163 ins_encode %{
19164 int opcode = this->ideal_Opcode();
19165 int vlen = Matcher::vector_length(this, $src2);
19166 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19167 %}
19168 ins_pipe( pipe_slow );
19169 %}
19170
19171 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19172 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19173 match(Set dst (AddReductionVL src1 src2));
19174 match(Set dst (MulReductionVL src1 src2));
19175 match(Set dst (AndReductionV src1 src2));
19176 match(Set dst ( OrReductionV src1 src2));
19177 match(Set dst (XorReductionV src1 src2));
19178 match(Set dst (MinReductionV src1 src2));
19179 match(Set dst (MaxReductionV src1 src2));
19180 effect(TEMP vtmp1, TEMP vtmp2);
19181 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19182 ins_encode %{
19183 int opcode = this->ideal_Opcode();
19184 int vlen = Matcher::vector_length(this, $src2);
19185 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19186 %}
19187 ins_pipe( pipe_slow );
19188 %}
19189
19190 // =======================Float Reduction==========================================
19191
19192 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19193 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19194 match(Set dst (AddReductionVF dst src));
19195 match(Set dst (MulReductionVF dst src));
19196 effect(TEMP dst, TEMP vtmp);
19197 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19198 ins_encode %{
19199 int opcode = this->ideal_Opcode();
19200 int vlen = Matcher::vector_length(this, $src);
19201 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19202 %}
19203 ins_pipe( pipe_slow );
19204 %}
19205
19206 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19207 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19208 match(Set dst (AddReductionVF dst src));
19209 match(Set dst (MulReductionVF dst src));
19210 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19211 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19212 ins_encode %{
19213 int opcode = this->ideal_Opcode();
19214 int vlen = Matcher::vector_length(this, $src);
19215 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19216 %}
19217 ins_pipe( pipe_slow );
19218 %}
19219
19220 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19221 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19222 match(Set dst (AddReductionVF dst src));
19223 match(Set dst (MulReductionVF dst src));
19224 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19225 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19226 ins_encode %{
19227 int opcode = this->ideal_Opcode();
19228 int vlen = Matcher::vector_length(this, $src);
19229 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19230 %}
19231 ins_pipe( pipe_slow );
19232 %}
19233
19234
19235 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19236 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19237 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19238 // src1 contains reduction identity
19239 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19240 match(Set dst (AddReductionVF src1 src2));
19241 match(Set dst (MulReductionVF src1 src2));
19242 effect(TEMP dst);
19243 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19244 ins_encode %{
19245 int opcode = this->ideal_Opcode();
19246 int vlen = Matcher::vector_length(this, $src2);
19247 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19248 %}
19249 ins_pipe( pipe_slow );
19250 %}
19251
19252 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19253 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19254 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19255 // src1 contains reduction identity
19256 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19257 match(Set dst (AddReductionVF src1 src2));
19258 match(Set dst (MulReductionVF src1 src2));
19259 effect(TEMP dst, TEMP vtmp);
19260 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19261 ins_encode %{
19262 int opcode = this->ideal_Opcode();
19263 int vlen = Matcher::vector_length(this, $src2);
19264 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19265 %}
19266 ins_pipe( pipe_slow );
19267 %}
19268
19269 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19270 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19271 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19272 // src1 contains reduction identity
19273 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19274 match(Set dst (AddReductionVF src1 src2));
19275 match(Set dst (MulReductionVF src1 src2));
19276 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19277 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19278 ins_encode %{
19279 int opcode = this->ideal_Opcode();
19280 int vlen = Matcher::vector_length(this, $src2);
19281 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19282 %}
19283 ins_pipe( pipe_slow );
19284 %}
19285
19286 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19287 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19288 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19289 // src1 contains reduction identity
19290 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19291 match(Set dst (AddReductionVF src1 src2));
19292 match(Set dst (MulReductionVF src1 src2));
19293 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19294 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19295 ins_encode %{
19296 int opcode = this->ideal_Opcode();
19297 int vlen = Matcher::vector_length(this, $src2);
19298 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19299 %}
19300 ins_pipe( pipe_slow );
19301 %}
19302
19303 // =======================Double Reduction==========================================
19304
19305 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19306 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19307 match(Set dst (AddReductionVD dst src));
19308 match(Set dst (MulReductionVD dst src));
19309 effect(TEMP dst, TEMP vtmp);
19310 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19311 ins_encode %{
19312 int opcode = this->ideal_Opcode();
19313 int vlen = Matcher::vector_length(this, $src);
19314 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19315 %}
19316 ins_pipe( pipe_slow );
19317 %}
19318
19319 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19320 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19321 match(Set dst (AddReductionVD dst src));
19322 match(Set dst (MulReductionVD dst src));
19323 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19324 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19325 ins_encode %{
19326 int opcode = this->ideal_Opcode();
19327 int vlen = Matcher::vector_length(this, $src);
19328 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19329 %}
19330 ins_pipe( pipe_slow );
19331 %}
19332
19333 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19334 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19335 match(Set dst (AddReductionVD dst src));
19336 match(Set dst (MulReductionVD dst src));
19337 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19338 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19339 ins_encode %{
19340 int opcode = this->ideal_Opcode();
19341 int vlen = Matcher::vector_length(this, $src);
19342 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19343 %}
19344 ins_pipe( pipe_slow );
19345 %}
19346
19347 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19348 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19349 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19350 // src1 contains reduction identity
19351 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19352 match(Set dst (AddReductionVD src1 src2));
19353 match(Set dst (MulReductionVD src1 src2));
19354 effect(TEMP dst);
19355 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19356 ins_encode %{
19357 int opcode = this->ideal_Opcode();
19358 int vlen = Matcher::vector_length(this, $src2);
19359 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19360 %}
19361 ins_pipe( pipe_slow );
19362 %}
19363
19364 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19365 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19366 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19367 // src1 contains reduction identity
19368 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19369 match(Set dst (AddReductionVD src1 src2));
19370 match(Set dst (MulReductionVD src1 src2));
19371 effect(TEMP dst, TEMP vtmp);
19372 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19373 ins_encode %{
19374 int opcode = this->ideal_Opcode();
19375 int vlen = Matcher::vector_length(this, $src2);
19376 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19377 %}
19378 ins_pipe( pipe_slow );
19379 %}
19380
19381 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19382 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19383 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19384 // src1 contains reduction identity
19385 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19386 match(Set dst (AddReductionVD src1 src2));
19387 match(Set dst (MulReductionVD src1 src2));
19388 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19389 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19390 ins_encode %{
19391 int opcode = this->ideal_Opcode();
19392 int vlen = Matcher::vector_length(this, $src2);
19393 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19394 %}
19395 ins_pipe( pipe_slow );
19396 %}
19397
19398 // =======================Byte Reduction==========================================
19399
19400 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19401 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19402 match(Set dst (AddReductionVI src1 src2));
19403 match(Set dst (AndReductionV src1 src2));
19404 match(Set dst ( OrReductionV src1 src2));
19405 match(Set dst (XorReductionV src1 src2));
19406 match(Set dst (MinReductionV src1 src2));
19407 match(Set dst (MaxReductionV src1 src2));
19408 effect(TEMP vtmp1, TEMP vtmp2);
19409 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19410 ins_encode %{
19411 int opcode = this->ideal_Opcode();
19412 int vlen = Matcher::vector_length(this, $src2);
19413 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19414 %}
19415 ins_pipe( pipe_slow );
19416 %}
19417
19418 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19419 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19420 match(Set dst (AddReductionVI src1 src2));
19421 match(Set dst (AndReductionV src1 src2));
19422 match(Set dst ( OrReductionV src1 src2));
19423 match(Set dst (XorReductionV src1 src2));
19424 match(Set dst (MinReductionV src1 src2));
19425 match(Set dst (MaxReductionV src1 src2));
19426 effect(TEMP vtmp1, TEMP vtmp2);
19427 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19428 ins_encode %{
19429 int opcode = this->ideal_Opcode();
19430 int vlen = Matcher::vector_length(this, $src2);
19431 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19432 %}
19433 ins_pipe( pipe_slow );
19434 %}
19435
19436 // =======================Short Reduction==========================================
19437
19438 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19439 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19440 match(Set dst (AddReductionVI src1 src2));
19441 match(Set dst (MulReductionVI src1 src2));
19442 match(Set dst (AndReductionV src1 src2));
19443 match(Set dst ( OrReductionV src1 src2));
19444 match(Set dst (XorReductionV src1 src2));
19445 match(Set dst (MinReductionV src1 src2));
19446 match(Set dst (MaxReductionV src1 src2));
19447 effect(TEMP vtmp1, TEMP vtmp2);
19448 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19449 ins_encode %{
19450 int opcode = this->ideal_Opcode();
19451 int vlen = Matcher::vector_length(this, $src2);
19452 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19453 %}
19454 ins_pipe( pipe_slow );
19455 %}
19456
19457 // =======================Mul Reduction==========================================
19458
19459 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19460 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19461 Matcher::vector_length(n->in(2)) <= 32); // src2
19462 match(Set dst (MulReductionVI src1 src2));
19463 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19464 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19465 ins_encode %{
19466 int opcode = this->ideal_Opcode();
19467 int vlen = Matcher::vector_length(this, $src2);
19468 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19469 %}
19470 ins_pipe( pipe_slow );
19471 %}
19472
19473 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19474 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19475 Matcher::vector_length(n->in(2)) == 64); // src2
19476 match(Set dst (MulReductionVI src1 src2));
19477 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19478 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19479 ins_encode %{
19480 int opcode = this->ideal_Opcode();
19481 int vlen = Matcher::vector_length(this, $src2);
19482 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19483 %}
19484 ins_pipe( pipe_slow );
19485 %}
19486
19487 //--------------------Min/Max Float Reduction --------------------
19488 // Float Min Reduction
19489 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19490 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19491 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19492 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19493 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19494 Matcher::vector_length(n->in(2)) == 2);
19495 match(Set dst (MinReductionV src1 src2));
19496 match(Set dst (MaxReductionV src1 src2));
19497 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19498 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19499 ins_encode %{
19500 assert(UseAVX > 0, "sanity");
19501
19502 int opcode = this->ideal_Opcode();
19503 int vlen = Matcher::vector_length(this, $src2);
19504 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19505 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19506 %}
19507 ins_pipe( pipe_slow );
19508 %}
19509
19510 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19511 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19512 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19513 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19514 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19515 Matcher::vector_length(n->in(2)) >= 4);
19516 match(Set dst (MinReductionV src1 src2));
19517 match(Set dst (MaxReductionV src1 src2));
19518 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19519 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19520 ins_encode %{
19521 assert(UseAVX > 0, "sanity");
19522
19523 int opcode = this->ideal_Opcode();
19524 int vlen = Matcher::vector_length(this, $src2);
19525 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19526 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19527 %}
19528 ins_pipe( pipe_slow );
19529 %}
19530
19531 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19532 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19533 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19534 Matcher::vector_length(n->in(2)) == 2);
19535 match(Set dst (MinReductionV dst src));
19536 match(Set dst (MaxReductionV dst src));
19537 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19538 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19539 ins_encode %{
19540 assert(UseAVX > 0, "sanity");
19541
19542 int opcode = this->ideal_Opcode();
19543 int vlen = Matcher::vector_length(this, $src);
19544 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19545 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19546 %}
19547 ins_pipe( pipe_slow );
19548 %}
19549
19550
19551 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19552 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19553 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19554 Matcher::vector_length(n->in(2)) >= 4);
19555 match(Set dst (MinReductionV dst src));
19556 match(Set dst (MaxReductionV dst src));
19557 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19558 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19559 ins_encode %{
19560 assert(UseAVX > 0, "sanity");
19561
19562 int opcode = this->ideal_Opcode();
19563 int vlen = Matcher::vector_length(this, $src);
19564 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19565 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19566 %}
19567 ins_pipe( pipe_slow );
19568 %}
19569
19570 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19571 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19572 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19573 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19574 Matcher::vector_length(n->in(2)) == 2);
19575 match(Set dst (MinReductionV src1 src2));
19576 match(Set dst (MaxReductionV src1 src2));
19577 effect(TEMP dst, TEMP xtmp1);
19578 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19579 ins_encode %{
19580 int opcode = this->ideal_Opcode();
19581 int vlen = Matcher::vector_length(this, $src2);
19582 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19583 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19584 %}
19585 ins_pipe( pipe_slow );
19586 %}
19587
19588 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19589 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19590 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19591 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19592 Matcher::vector_length(n->in(2)) >= 4);
19593 match(Set dst (MinReductionV src1 src2));
19594 match(Set dst (MaxReductionV src1 src2));
19595 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19596 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19597 ins_encode %{
19598 int opcode = this->ideal_Opcode();
19599 int vlen = Matcher::vector_length(this, $src2);
19600 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19601 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19602 %}
19603 ins_pipe( pipe_slow );
19604 %}
19605
19606 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19607 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19608 Matcher::vector_length(n->in(2)) == 2);
19609 match(Set dst (MinReductionV dst src));
19610 match(Set dst (MaxReductionV dst src));
19611 effect(TEMP dst, TEMP xtmp1);
19612 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19613 ins_encode %{
19614 int opcode = this->ideal_Opcode();
19615 int vlen = Matcher::vector_length(this, $src);
19616 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19617 $xtmp1$$XMMRegister);
19618 %}
19619 ins_pipe( pipe_slow );
19620 %}
19621
19622 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19623 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19624 Matcher::vector_length(n->in(2)) >= 4);
19625 match(Set dst (MinReductionV dst src));
19626 match(Set dst (MaxReductionV dst src));
19627 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19628 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19629 ins_encode %{
19630 int opcode = this->ideal_Opcode();
19631 int vlen = Matcher::vector_length(this, $src);
19632 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19633 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19634 %}
19635 ins_pipe( pipe_slow );
19636 %}
19637
19638 //--------------------Min Double Reduction --------------------
19639 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19640 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19641 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19642 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19643 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19644 Matcher::vector_length(n->in(2)) == 2);
19645 match(Set dst (MinReductionV src1 src2));
19646 match(Set dst (MaxReductionV src1 src2));
19647 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19648 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19649 ins_encode %{
19650 assert(UseAVX > 0, "sanity");
19651
19652 int opcode = this->ideal_Opcode();
19653 int vlen = Matcher::vector_length(this, $src2);
19654 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19655 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19656 %}
19657 ins_pipe( pipe_slow );
19658 %}
19659
19660 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19661 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19662 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19663 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19664 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19665 Matcher::vector_length(n->in(2)) >= 4);
19666 match(Set dst (MinReductionV src1 src2));
19667 match(Set dst (MaxReductionV src1 src2));
19668 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19669 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19670 ins_encode %{
19671 assert(UseAVX > 0, "sanity");
19672
19673 int opcode = this->ideal_Opcode();
19674 int vlen = Matcher::vector_length(this, $src2);
19675 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19676 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19677 %}
19678 ins_pipe( pipe_slow );
19679 %}
19680
19681
19682 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19683 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19684 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19685 Matcher::vector_length(n->in(2)) == 2);
19686 match(Set dst (MinReductionV dst src));
19687 match(Set dst (MaxReductionV dst src));
19688 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19689 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19690 ins_encode %{
19691 assert(UseAVX > 0, "sanity");
19692
19693 int opcode = this->ideal_Opcode();
19694 int vlen = Matcher::vector_length(this, $src);
19695 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19696 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19697 %}
19698 ins_pipe( pipe_slow );
19699 %}
19700
19701 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19702 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19703 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19704 Matcher::vector_length(n->in(2)) >= 4);
19705 match(Set dst (MinReductionV dst src));
19706 match(Set dst (MaxReductionV dst src));
19707 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19708 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19709 ins_encode %{
19710 assert(UseAVX > 0, "sanity");
19711
19712 int opcode = this->ideal_Opcode();
19713 int vlen = Matcher::vector_length(this, $src);
19714 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19715 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19716 %}
19717 ins_pipe( pipe_slow );
19718 %}
19719
19720 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19721 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19722 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19723 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19724 Matcher::vector_length(n->in(2)) == 2);
19725 match(Set dst (MinReductionV src1 src2));
19726 match(Set dst (MaxReductionV src1 src2));
19727 effect(TEMP dst, TEMP xtmp1);
19728 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19729 ins_encode %{
19730 int opcode = this->ideal_Opcode();
19731 int vlen = Matcher::vector_length(this, $src2);
19732 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19733 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19734 %}
19735 ins_pipe( pipe_slow );
19736 %}
19737
19738 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19739 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19740 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19741 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19742 Matcher::vector_length(n->in(2)) >= 4);
19743 match(Set dst (MinReductionV src1 src2));
19744 match(Set dst (MaxReductionV src1 src2));
19745 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19746 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19747 ins_encode %{
19748 int opcode = this->ideal_Opcode();
19749 int vlen = Matcher::vector_length(this, $src2);
19750 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19751 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19752 %}
19753 ins_pipe( pipe_slow );
19754 %}
19755
19756
19757 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19758 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19759 Matcher::vector_length(n->in(2)) == 2);
19760 match(Set dst (MinReductionV dst src));
19761 match(Set dst (MaxReductionV dst src));
19762 effect(TEMP dst, TEMP xtmp1);
19763 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19764 ins_encode %{
19765 int opcode = this->ideal_Opcode();
19766 int vlen = Matcher::vector_length(this, $src);
19767 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19768 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19769 %}
19770 ins_pipe( pipe_slow );
19771 %}
19772
19773 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19774 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19775 Matcher::vector_length(n->in(2)) >= 4);
19776 match(Set dst (MinReductionV dst src));
19777 match(Set dst (MaxReductionV dst src));
19778 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19779 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19780 ins_encode %{
19781 int opcode = this->ideal_Opcode();
19782 int vlen = Matcher::vector_length(this, $src);
19783 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19784 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19785 %}
19786 ins_pipe( pipe_slow );
19787 %}
19788
19789 // ====================VECTOR ARITHMETIC=======================================
19790
19791 // --------------------------------- ADD --------------------------------------
19792
19793 // Bytes vector add
19794 instruct vaddB(vec dst, vec src) %{
19795 predicate(UseAVX == 0);
19796 match(Set dst (AddVB dst src));
19797 format %{ "paddb $dst,$src\t! add packedB" %}
19798 ins_encode %{
19799 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19800 %}
19801 ins_pipe( pipe_slow );
19802 %}
19803
19804 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19805 predicate(UseAVX > 0);
19806 match(Set dst (AddVB src1 src2));
19807 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19808 ins_encode %{
19809 int vlen_enc = vector_length_encoding(this);
19810 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19811 %}
19812 ins_pipe( pipe_slow );
19813 %}
19814
19815 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19816 predicate((UseAVX > 0) &&
19817 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19818 match(Set dst (AddVB src (LoadVector mem)));
19819 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19820 ins_encode %{
19821 int vlen_enc = vector_length_encoding(this);
19822 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19823 %}
19824 ins_pipe( pipe_slow );
19825 %}
19826
19827 // Shorts/Chars vector add
19828 instruct vaddS(vec dst, vec src) %{
19829 predicate(UseAVX == 0);
19830 match(Set dst (AddVS dst src));
19831 format %{ "paddw $dst,$src\t! add packedS" %}
19832 ins_encode %{
19833 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19834 %}
19835 ins_pipe( pipe_slow );
19836 %}
19837
19838 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19839 predicate(UseAVX > 0);
19840 match(Set dst (AddVS src1 src2));
19841 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19842 ins_encode %{
19843 int vlen_enc = vector_length_encoding(this);
19844 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19845 %}
19846 ins_pipe( pipe_slow );
19847 %}
19848
19849 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19850 predicate((UseAVX > 0) &&
19851 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19852 match(Set dst (AddVS src (LoadVector mem)));
19853 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19854 ins_encode %{
19855 int vlen_enc = vector_length_encoding(this);
19856 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19857 %}
19858 ins_pipe( pipe_slow );
19859 %}
19860
19861 // Integers vector add
19862 instruct vaddI(vec dst, vec src) %{
19863 predicate(UseAVX == 0);
19864 match(Set dst (AddVI dst src));
19865 format %{ "paddd $dst,$src\t! add packedI" %}
19866 ins_encode %{
19867 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19868 %}
19869 ins_pipe( pipe_slow );
19870 %}
19871
19872 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19873 predicate(UseAVX > 0);
19874 match(Set dst (AddVI src1 src2));
19875 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19876 ins_encode %{
19877 int vlen_enc = vector_length_encoding(this);
19878 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19879 %}
19880 ins_pipe( pipe_slow );
19881 %}
19882
19883
19884 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19885 predicate((UseAVX > 0) &&
19886 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19887 match(Set dst (AddVI src (LoadVector mem)));
19888 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19889 ins_encode %{
19890 int vlen_enc = vector_length_encoding(this);
19891 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19892 %}
19893 ins_pipe( pipe_slow );
19894 %}
19895
19896 // Longs vector add
19897 instruct vaddL(vec dst, vec src) %{
19898 predicate(UseAVX == 0);
19899 match(Set dst (AddVL dst src));
19900 format %{ "paddq $dst,$src\t! add packedL" %}
19901 ins_encode %{
19902 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19903 %}
19904 ins_pipe( pipe_slow );
19905 %}
19906
19907 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19908 predicate(UseAVX > 0);
19909 match(Set dst (AddVL src1 src2));
19910 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19911 ins_encode %{
19912 int vlen_enc = vector_length_encoding(this);
19913 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19914 %}
19915 ins_pipe( pipe_slow );
19916 %}
19917
19918 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19919 predicate((UseAVX > 0) &&
19920 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19921 match(Set dst (AddVL src (LoadVector mem)));
19922 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19923 ins_encode %{
19924 int vlen_enc = vector_length_encoding(this);
19925 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19926 %}
19927 ins_pipe( pipe_slow );
19928 %}
19929
19930 // Floats vector add
19931 instruct vaddF(vec dst, vec src) %{
19932 predicate(UseAVX == 0);
19933 match(Set dst (AddVF dst src));
19934 format %{ "addps $dst,$src\t! add packedF" %}
19935 ins_encode %{
19936 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19937 %}
19938 ins_pipe( pipe_slow );
19939 %}
19940
19941 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19942 predicate(UseAVX > 0);
19943 match(Set dst (AddVF src1 src2));
19944 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
19945 ins_encode %{
19946 int vlen_enc = vector_length_encoding(this);
19947 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19948 %}
19949 ins_pipe( pipe_slow );
19950 %}
19951
19952 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19953 predicate((UseAVX > 0) &&
19954 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19955 match(Set dst (AddVF src (LoadVector mem)));
19956 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
19957 ins_encode %{
19958 int vlen_enc = vector_length_encoding(this);
19959 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19960 %}
19961 ins_pipe( pipe_slow );
19962 %}
19963
19964 // Doubles vector add
19965 instruct vaddD(vec dst, vec src) %{
19966 predicate(UseAVX == 0);
19967 match(Set dst (AddVD dst src));
19968 format %{ "addpd $dst,$src\t! add packedD" %}
19969 ins_encode %{
19970 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19971 %}
19972 ins_pipe( pipe_slow );
19973 %}
19974
19975 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19976 predicate(UseAVX > 0);
19977 match(Set dst (AddVD src1 src2));
19978 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
19979 ins_encode %{
19980 int vlen_enc = vector_length_encoding(this);
19981 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19982 %}
19983 ins_pipe( pipe_slow );
19984 %}
19985
19986 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19987 predicate((UseAVX > 0) &&
19988 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19989 match(Set dst (AddVD src (LoadVector mem)));
19990 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
19991 ins_encode %{
19992 int vlen_enc = vector_length_encoding(this);
19993 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19994 %}
19995 ins_pipe( pipe_slow );
19996 %}
19997
19998 // --------------------------------- SUB --------------------------------------
19999
20000 // Bytes vector sub
20001 instruct vsubB(vec dst, vec src) %{
20002 predicate(UseAVX == 0);
20003 match(Set dst (SubVB dst src));
20004 format %{ "psubb $dst,$src\t! sub packedB" %}
20005 ins_encode %{
20006 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20007 %}
20008 ins_pipe( pipe_slow );
20009 %}
20010
20011 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20012 predicate(UseAVX > 0);
20013 match(Set dst (SubVB src1 src2));
20014 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20015 ins_encode %{
20016 int vlen_enc = vector_length_encoding(this);
20017 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20018 %}
20019 ins_pipe( pipe_slow );
20020 %}
20021
20022 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20023 predicate((UseAVX > 0) &&
20024 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20025 match(Set dst (SubVB src (LoadVector mem)));
20026 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20027 ins_encode %{
20028 int vlen_enc = vector_length_encoding(this);
20029 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20030 %}
20031 ins_pipe( pipe_slow );
20032 %}
20033
20034 // Shorts/Chars vector sub
20035 instruct vsubS(vec dst, vec src) %{
20036 predicate(UseAVX == 0);
20037 match(Set dst (SubVS dst src));
20038 format %{ "psubw $dst,$src\t! sub packedS" %}
20039 ins_encode %{
20040 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20041 %}
20042 ins_pipe( pipe_slow );
20043 %}
20044
20045
20046 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20047 predicate(UseAVX > 0);
20048 match(Set dst (SubVS src1 src2));
20049 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20050 ins_encode %{
20051 int vlen_enc = vector_length_encoding(this);
20052 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20053 %}
20054 ins_pipe( pipe_slow );
20055 %}
20056
20057 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20058 predicate((UseAVX > 0) &&
20059 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20060 match(Set dst (SubVS src (LoadVector mem)));
20061 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20062 ins_encode %{
20063 int vlen_enc = vector_length_encoding(this);
20064 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20065 %}
20066 ins_pipe( pipe_slow );
20067 %}
20068
20069 // Integers vector sub
20070 instruct vsubI(vec dst, vec src) %{
20071 predicate(UseAVX == 0);
20072 match(Set dst (SubVI dst src));
20073 format %{ "psubd $dst,$src\t! sub packedI" %}
20074 ins_encode %{
20075 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20076 %}
20077 ins_pipe( pipe_slow );
20078 %}
20079
20080 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20081 predicate(UseAVX > 0);
20082 match(Set dst (SubVI src1 src2));
20083 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20084 ins_encode %{
20085 int vlen_enc = vector_length_encoding(this);
20086 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20087 %}
20088 ins_pipe( pipe_slow );
20089 %}
20090
20091 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20092 predicate((UseAVX > 0) &&
20093 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20094 match(Set dst (SubVI src (LoadVector mem)));
20095 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20096 ins_encode %{
20097 int vlen_enc = vector_length_encoding(this);
20098 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20099 %}
20100 ins_pipe( pipe_slow );
20101 %}
20102
20103 // Longs vector sub
20104 instruct vsubL(vec dst, vec src) %{
20105 predicate(UseAVX == 0);
20106 match(Set dst (SubVL dst src));
20107 format %{ "psubq $dst,$src\t! sub packedL" %}
20108 ins_encode %{
20109 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20110 %}
20111 ins_pipe( pipe_slow );
20112 %}
20113
20114 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20115 predicate(UseAVX > 0);
20116 match(Set dst (SubVL src1 src2));
20117 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20118 ins_encode %{
20119 int vlen_enc = vector_length_encoding(this);
20120 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20121 %}
20122 ins_pipe( pipe_slow );
20123 %}
20124
20125
20126 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20127 predicate((UseAVX > 0) &&
20128 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20129 match(Set dst (SubVL src (LoadVector mem)));
20130 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20131 ins_encode %{
20132 int vlen_enc = vector_length_encoding(this);
20133 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20134 %}
20135 ins_pipe( pipe_slow );
20136 %}
20137
20138 // Floats vector sub
20139 instruct vsubF(vec dst, vec src) %{
20140 predicate(UseAVX == 0);
20141 match(Set dst (SubVF dst src));
20142 format %{ "subps $dst,$src\t! sub packedF" %}
20143 ins_encode %{
20144 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20145 %}
20146 ins_pipe( pipe_slow );
20147 %}
20148
20149 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20150 predicate(UseAVX > 0);
20151 match(Set dst (SubVF src1 src2));
20152 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20153 ins_encode %{
20154 int vlen_enc = vector_length_encoding(this);
20155 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20156 %}
20157 ins_pipe( pipe_slow );
20158 %}
20159
20160 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20161 predicate((UseAVX > 0) &&
20162 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20163 match(Set dst (SubVF src (LoadVector mem)));
20164 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20165 ins_encode %{
20166 int vlen_enc = vector_length_encoding(this);
20167 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20168 %}
20169 ins_pipe( pipe_slow );
20170 %}
20171
20172 // Doubles vector sub
20173 instruct vsubD(vec dst, vec src) %{
20174 predicate(UseAVX == 0);
20175 match(Set dst (SubVD dst src));
20176 format %{ "subpd $dst,$src\t! sub packedD" %}
20177 ins_encode %{
20178 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20179 %}
20180 ins_pipe( pipe_slow );
20181 %}
20182
20183 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20184 predicate(UseAVX > 0);
20185 match(Set dst (SubVD src1 src2));
20186 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20187 ins_encode %{
20188 int vlen_enc = vector_length_encoding(this);
20189 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20190 %}
20191 ins_pipe( pipe_slow );
20192 %}
20193
20194 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20195 predicate((UseAVX > 0) &&
20196 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20197 match(Set dst (SubVD src (LoadVector mem)));
20198 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20199 ins_encode %{
20200 int vlen_enc = vector_length_encoding(this);
20201 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20202 %}
20203 ins_pipe( pipe_slow );
20204 %}
20205
20206 // --------------------------------- MUL --------------------------------------
20207
20208 // Byte vector mul
20209 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20210 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20211 match(Set dst (MulVB src1 src2));
20212 effect(TEMP dst, TEMP xtmp);
20213 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20214 ins_encode %{
20215 assert(UseSSE > 3, "required");
20216 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20217 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20218 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20219 __ psllw($dst$$XMMRegister, 8);
20220 __ psrlw($dst$$XMMRegister, 8);
20221 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20222 %}
20223 ins_pipe( pipe_slow );
20224 %}
20225
20226 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20227 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20228 match(Set dst (MulVB src1 src2));
20229 effect(TEMP dst, TEMP xtmp);
20230 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20231 ins_encode %{
20232 assert(UseSSE > 3, "required");
20233 // Odd-index elements
20234 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20235 __ psrlw($dst$$XMMRegister, 8);
20236 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20237 __ psrlw($xtmp$$XMMRegister, 8);
20238 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20239 __ psllw($dst$$XMMRegister, 8);
20240 // Even-index elements
20241 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20242 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20243 __ psllw($xtmp$$XMMRegister, 8);
20244 __ psrlw($xtmp$$XMMRegister, 8);
20245 // Combine
20246 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20247 %}
20248 ins_pipe( pipe_slow );
20249 %}
20250
20251 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20252 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20253 match(Set dst (MulVB src1 src2));
20254 effect(TEMP xtmp1, TEMP xtmp2);
20255 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20256 ins_encode %{
20257 int vlen_enc = vector_length_encoding(this);
20258 // Odd-index elements
20259 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20260 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20261 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20262 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20263 // Even-index elements
20264 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20265 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20266 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20267 // Combine
20268 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20269 %}
20270 ins_pipe( pipe_slow );
20271 %}
20272
20273 // Shorts/Chars vector mul
20274 instruct vmulS(vec dst, vec src) %{
20275 predicate(UseAVX == 0);
20276 match(Set dst (MulVS dst src));
20277 format %{ "pmullw $dst,$src\t! mul packedS" %}
20278 ins_encode %{
20279 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20280 %}
20281 ins_pipe( pipe_slow );
20282 %}
20283
20284 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20285 predicate(UseAVX > 0);
20286 match(Set dst (MulVS src1 src2));
20287 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20288 ins_encode %{
20289 int vlen_enc = vector_length_encoding(this);
20290 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20291 %}
20292 ins_pipe( pipe_slow );
20293 %}
20294
20295 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20296 predicate((UseAVX > 0) &&
20297 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20298 match(Set dst (MulVS src (LoadVector mem)));
20299 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20300 ins_encode %{
20301 int vlen_enc = vector_length_encoding(this);
20302 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20303 %}
20304 ins_pipe( pipe_slow );
20305 %}
20306
20307 // Integers vector mul
20308 instruct vmulI(vec dst, vec src) %{
20309 predicate(UseAVX == 0);
20310 match(Set dst (MulVI dst src));
20311 format %{ "pmulld $dst,$src\t! mul packedI" %}
20312 ins_encode %{
20313 assert(UseSSE > 3, "required");
20314 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20315 %}
20316 ins_pipe( pipe_slow );
20317 %}
20318
20319 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20320 predicate(UseAVX > 0);
20321 match(Set dst (MulVI src1 src2));
20322 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20323 ins_encode %{
20324 int vlen_enc = vector_length_encoding(this);
20325 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20326 %}
20327 ins_pipe( pipe_slow );
20328 %}
20329
20330 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20331 predicate((UseAVX > 0) &&
20332 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20333 match(Set dst (MulVI src (LoadVector mem)));
20334 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20335 ins_encode %{
20336 int vlen_enc = vector_length_encoding(this);
20337 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20338 %}
20339 ins_pipe( pipe_slow );
20340 %}
20341
20342 // Longs vector mul
20343 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20344 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20345 VM_Version::supports_avx512dq()) ||
20346 VM_Version::supports_avx512vldq());
20347 match(Set dst (MulVL src1 src2));
20348 ins_cost(500);
20349 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20350 ins_encode %{
20351 assert(UseAVX > 2, "required");
20352 int vlen_enc = vector_length_encoding(this);
20353 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20354 %}
20355 ins_pipe( pipe_slow );
20356 %}
20357
20358 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20359 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20360 VM_Version::supports_avx512dq()) ||
20361 (Matcher::vector_length_in_bytes(n) > 8 &&
20362 VM_Version::supports_avx512vldq()));
20363 match(Set dst (MulVL src (LoadVector mem)));
20364 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20365 ins_cost(500);
20366 ins_encode %{
20367 assert(UseAVX > 2, "required");
20368 int vlen_enc = vector_length_encoding(this);
20369 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20370 %}
20371 ins_pipe( pipe_slow );
20372 %}
20373
20374 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20375 predicate(UseAVX == 0);
20376 match(Set dst (MulVL src1 src2));
20377 ins_cost(500);
20378 effect(TEMP dst, TEMP xtmp);
20379 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20380 ins_encode %{
20381 assert(VM_Version::supports_sse4_1(), "required");
20382 // Get the lo-hi products, only the lower 32 bits is in concerns
20383 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20384 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20385 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20386 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20387 __ psllq($dst$$XMMRegister, 32);
20388 // Get the lo-lo products
20389 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20390 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20391 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20392 %}
20393 ins_pipe( pipe_slow );
20394 %}
20395
20396 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20397 predicate(UseAVX > 0 &&
20398 ((Matcher::vector_length_in_bytes(n) == 64 &&
20399 !VM_Version::supports_avx512dq()) ||
20400 (Matcher::vector_length_in_bytes(n) < 64 &&
20401 !VM_Version::supports_avx512vldq())));
20402 match(Set dst (MulVL src1 src2));
20403 effect(TEMP xtmp1, TEMP xtmp2);
20404 ins_cost(500);
20405 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20406 ins_encode %{
20407 int vlen_enc = vector_length_encoding(this);
20408 // Get the lo-hi products, only the lower 32 bits is in concerns
20409 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20410 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20411 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20412 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20413 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20414 // Get the lo-lo products
20415 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20416 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20417 %}
20418 ins_pipe( pipe_slow );
20419 %}
20420
20421 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20422 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20423 match(Set dst (MulVL src1 src2));
20424 ins_cost(100);
20425 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20426 ins_encode %{
20427 int vlen_enc = vector_length_encoding(this);
20428 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20429 %}
20430 ins_pipe( pipe_slow );
20431 %}
20432
20433 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20434 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20435 match(Set dst (MulVL src1 src2));
20436 ins_cost(100);
20437 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20438 ins_encode %{
20439 int vlen_enc = vector_length_encoding(this);
20440 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20441 %}
20442 ins_pipe( pipe_slow );
20443 %}
20444
20445 // Floats vector mul
20446 instruct vmulF(vec dst, vec src) %{
20447 predicate(UseAVX == 0);
20448 match(Set dst (MulVF dst src));
20449 format %{ "mulps $dst,$src\t! mul packedF" %}
20450 ins_encode %{
20451 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20452 %}
20453 ins_pipe( pipe_slow );
20454 %}
20455
20456 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20457 predicate(UseAVX > 0);
20458 match(Set dst (MulVF src1 src2));
20459 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20460 ins_encode %{
20461 int vlen_enc = vector_length_encoding(this);
20462 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20463 %}
20464 ins_pipe( pipe_slow );
20465 %}
20466
20467 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20468 predicate((UseAVX > 0) &&
20469 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20470 match(Set dst (MulVF src (LoadVector mem)));
20471 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20472 ins_encode %{
20473 int vlen_enc = vector_length_encoding(this);
20474 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20475 %}
20476 ins_pipe( pipe_slow );
20477 %}
20478
20479 // Doubles vector mul
20480 instruct vmulD(vec dst, vec src) %{
20481 predicate(UseAVX == 0);
20482 match(Set dst (MulVD dst src));
20483 format %{ "mulpd $dst,$src\t! mul packedD" %}
20484 ins_encode %{
20485 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20486 %}
20487 ins_pipe( pipe_slow );
20488 %}
20489
20490 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20491 predicate(UseAVX > 0);
20492 match(Set dst (MulVD src1 src2));
20493 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20494 ins_encode %{
20495 int vlen_enc = vector_length_encoding(this);
20496 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20497 %}
20498 ins_pipe( pipe_slow );
20499 %}
20500
20501 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20502 predicate((UseAVX > 0) &&
20503 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20504 match(Set dst (MulVD src (LoadVector mem)));
20505 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20506 ins_encode %{
20507 int vlen_enc = vector_length_encoding(this);
20508 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20509 %}
20510 ins_pipe( pipe_slow );
20511 %}
20512
20513 // --------------------------------- DIV --------------------------------------
20514
20515 // Floats vector div
20516 instruct vdivF(vec dst, vec src) %{
20517 predicate(UseAVX == 0);
20518 match(Set dst (DivVF dst src));
20519 format %{ "divps $dst,$src\t! div packedF" %}
20520 ins_encode %{
20521 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20522 %}
20523 ins_pipe( pipe_slow );
20524 %}
20525
20526 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20527 predicate(UseAVX > 0);
20528 match(Set dst (DivVF src1 src2));
20529 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20530 ins_encode %{
20531 int vlen_enc = vector_length_encoding(this);
20532 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20533 %}
20534 ins_pipe( pipe_slow );
20535 %}
20536
20537 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20538 predicate((UseAVX > 0) &&
20539 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20540 match(Set dst (DivVF src (LoadVector mem)));
20541 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20542 ins_encode %{
20543 int vlen_enc = vector_length_encoding(this);
20544 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20545 %}
20546 ins_pipe( pipe_slow );
20547 %}
20548
20549 // Doubles vector div
20550 instruct vdivD(vec dst, vec src) %{
20551 predicate(UseAVX == 0);
20552 match(Set dst (DivVD dst src));
20553 format %{ "divpd $dst,$src\t! div packedD" %}
20554 ins_encode %{
20555 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20556 %}
20557 ins_pipe( pipe_slow );
20558 %}
20559
20560 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20561 predicate(UseAVX > 0);
20562 match(Set dst (DivVD src1 src2));
20563 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20564 ins_encode %{
20565 int vlen_enc = vector_length_encoding(this);
20566 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20567 %}
20568 ins_pipe( pipe_slow );
20569 %}
20570
20571 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20572 predicate((UseAVX > 0) &&
20573 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20574 match(Set dst (DivVD src (LoadVector mem)));
20575 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20576 ins_encode %{
20577 int vlen_enc = vector_length_encoding(this);
20578 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20579 %}
20580 ins_pipe( pipe_slow );
20581 %}
20582
20583 // ------------------------------ MinMax ---------------------------------------
20584
20585 // Byte, Short, Int vector Min/Max
20586 instruct minmax_reg_sse(vec dst, vec src) %{
20587 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20588 UseAVX == 0);
20589 match(Set dst (MinV dst src));
20590 match(Set dst (MaxV dst src));
20591 format %{ "vector_minmax $dst,$src\t! " %}
20592 ins_encode %{
20593 assert(UseSSE >= 4, "required");
20594
20595 int opcode = this->ideal_Opcode();
20596 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20597 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20598 %}
20599 ins_pipe( pipe_slow );
20600 %}
20601
20602 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20603 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20604 UseAVX > 0);
20605 match(Set dst (MinV src1 src2));
20606 match(Set dst (MaxV src1 src2));
20607 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20608 ins_encode %{
20609 int opcode = this->ideal_Opcode();
20610 int vlen_enc = vector_length_encoding(this);
20611 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20612
20613 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20614 %}
20615 ins_pipe( pipe_slow );
20616 %}
20617
20618 // Long vector Min/Max
20619 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20620 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20621 UseAVX == 0);
20622 match(Set dst (MinV dst src));
20623 match(Set dst (MaxV src dst));
20624 effect(TEMP dst, TEMP tmp);
20625 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20626 ins_encode %{
20627 assert(UseSSE >= 4, "required");
20628
20629 int opcode = this->ideal_Opcode();
20630 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20631 assert(elem_bt == T_LONG, "sanity");
20632
20633 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20634 %}
20635 ins_pipe( pipe_slow );
20636 %}
20637
20638 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20639 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20640 UseAVX > 0 && !VM_Version::supports_avx512vl());
20641 match(Set dst (MinV src1 src2));
20642 match(Set dst (MaxV src1 src2));
20643 effect(TEMP dst);
20644 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20645 ins_encode %{
20646 int vlen_enc = vector_length_encoding(this);
20647 int opcode = this->ideal_Opcode();
20648 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20649 assert(elem_bt == T_LONG, "sanity");
20650
20651 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20652 %}
20653 ins_pipe( pipe_slow );
20654 %}
20655
20656 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20657 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20658 Matcher::vector_element_basic_type(n) == T_LONG);
20659 match(Set dst (MinV src1 src2));
20660 match(Set dst (MaxV src1 src2));
20661 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20662 ins_encode %{
20663 assert(UseAVX > 2, "required");
20664
20665 int vlen_enc = vector_length_encoding(this);
20666 int opcode = this->ideal_Opcode();
20667 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20668 assert(elem_bt == T_LONG, "sanity");
20669
20670 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20671 %}
20672 ins_pipe( pipe_slow );
20673 %}
20674
20675 // Float/Double vector Min/Max
20676 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20677 predicate(VM_Version::supports_avx10_2() &&
20678 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20679 match(Set dst (MinV a b));
20680 match(Set dst (MaxV a b));
20681 format %{ "vector_minmaxFP $dst, $a, $b" %}
20682 ins_encode %{
20683 int vlen_enc = vector_length_encoding(this);
20684 int opcode = this->ideal_Opcode();
20685 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20686 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20687 %}
20688 ins_pipe( pipe_slow );
20689 %}
20690
20691 // Float/Double vector Min/Max
20692 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20693 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20694 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20695 UseAVX > 0);
20696 match(Set dst (MinV a b));
20697 match(Set dst (MaxV a b));
20698 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20699 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20700 ins_encode %{
20701 assert(UseAVX > 0, "required");
20702
20703 int opcode = this->ideal_Opcode();
20704 int vlen_enc = vector_length_encoding(this);
20705 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20706
20707 __ vminmax_fp(opcode, elem_bt,
20708 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20709 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20710 %}
20711 ins_pipe( pipe_slow );
20712 %}
20713
20714 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20715 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20716 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20717 match(Set dst (MinV a b));
20718 match(Set dst (MaxV a b));
20719 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20720 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20721 ins_encode %{
20722 assert(UseAVX > 2, "required");
20723
20724 int opcode = this->ideal_Opcode();
20725 int vlen_enc = vector_length_encoding(this);
20726 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20727
20728 __ evminmax_fp(opcode, elem_bt,
20729 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20730 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20731 %}
20732 ins_pipe( pipe_slow );
20733 %}
20734
20735 // ------------------------------ Unsigned vector Min/Max ----------------------
20736
20737 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20738 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20739 match(Set dst (UMinV a b));
20740 match(Set dst (UMaxV a b));
20741 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20742 ins_encode %{
20743 int opcode = this->ideal_Opcode();
20744 int vlen_enc = vector_length_encoding(this);
20745 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20746 assert(is_integral_type(elem_bt), "");
20747 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20748 %}
20749 ins_pipe( pipe_slow );
20750 %}
20751
20752 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20753 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20754 match(Set dst (UMinV a (LoadVector b)));
20755 match(Set dst (UMaxV a (LoadVector b)));
20756 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20757 ins_encode %{
20758 int opcode = this->ideal_Opcode();
20759 int vlen_enc = vector_length_encoding(this);
20760 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20761 assert(is_integral_type(elem_bt), "");
20762 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20763 %}
20764 ins_pipe( pipe_slow );
20765 %}
20766
20767 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20768 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20769 match(Set dst (UMinV a b));
20770 match(Set dst (UMaxV a b));
20771 effect(TEMP xtmp1, TEMP xtmp2);
20772 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20773 ins_encode %{
20774 int opcode = this->ideal_Opcode();
20775 int vlen_enc = vector_length_encoding(this);
20776 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20777 %}
20778 ins_pipe( pipe_slow );
20779 %}
20780
20781 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20782 match(Set dst (UMinV (Binary dst src2) mask));
20783 match(Set dst (UMaxV (Binary dst src2) mask));
20784 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20785 ins_encode %{
20786 int vlen_enc = vector_length_encoding(this);
20787 BasicType bt = Matcher::vector_element_basic_type(this);
20788 int opc = this->ideal_Opcode();
20789 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20790 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20791 %}
20792 ins_pipe( pipe_slow );
20793 %}
20794
20795 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20796 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20797 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20798 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20799 ins_encode %{
20800 int vlen_enc = vector_length_encoding(this);
20801 BasicType bt = Matcher::vector_element_basic_type(this);
20802 int opc = this->ideal_Opcode();
20803 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20804 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20805 %}
20806 ins_pipe( pipe_slow );
20807 %}
20808
20809 // --------------------------------- Signum/CopySign ---------------------------
20810
20811 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20812 match(Set dst (SignumF dst (Binary zero one)));
20813 effect(KILL cr);
20814 format %{ "signumF $dst, $dst" %}
20815 ins_encode %{
20816 int opcode = this->ideal_Opcode();
20817 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20818 %}
20819 ins_pipe( pipe_slow );
20820 %}
20821
20822 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20823 match(Set dst (SignumD dst (Binary zero one)));
20824 effect(KILL cr);
20825 format %{ "signumD $dst, $dst" %}
20826 ins_encode %{
20827 int opcode = this->ideal_Opcode();
20828 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20829 %}
20830 ins_pipe( pipe_slow );
20831 %}
20832
20833 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20834 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20835 match(Set dst (SignumVF src (Binary zero one)));
20836 match(Set dst (SignumVD src (Binary zero one)));
20837 effect(TEMP dst, TEMP xtmp1);
20838 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20839 ins_encode %{
20840 int opcode = this->ideal_Opcode();
20841 int vec_enc = vector_length_encoding(this);
20842 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20843 $xtmp1$$XMMRegister, vec_enc);
20844 %}
20845 ins_pipe( pipe_slow );
20846 %}
20847
20848 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20849 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20850 match(Set dst (SignumVF src (Binary zero one)));
20851 match(Set dst (SignumVD src (Binary zero one)));
20852 effect(TEMP dst, TEMP ktmp1);
20853 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20854 ins_encode %{
20855 int opcode = this->ideal_Opcode();
20856 int vec_enc = vector_length_encoding(this);
20857 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20858 $ktmp1$$KRegister, vec_enc);
20859 %}
20860 ins_pipe( pipe_slow );
20861 %}
20862
20863 // ---------------------------------------
20864 // For copySign use 0xE4 as writemask for vpternlog
20865 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20866 // C (xmm2) is set to 0x7FFFFFFF
20867 // Wherever xmm2 is 0, we want to pick from B (sign)
20868 // Wherever xmm2 is 1, we want to pick from A (src)
20869 //
20870 // A B C Result
20871 // 0 0 0 0
20872 // 0 0 1 0
20873 // 0 1 0 1
20874 // 0 1 1 0
20875 // 1 0 0 0
20876 // 1 0 1 1
20877 // 1 1 0 1
20878 // 1 1 1 1
20879 //
20880 // Result going from high bit to low bit is 0x11100100 = 0xe4
20881 // ---------------------------------------
20882
20883 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20884 match(Set dst (CopySignF dst src));
20885 effect(TEMP tmp1, TEMP tmp2);
20886 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20887 ins_encode %{
20888 __ movl($tmp2$$Register, 0x7FFFFFFF);
20889 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20890 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20891 %}
20892 ins_pipe( pipe_slow );
20893 %}
20894
20895 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20896 match(Set dst (CopySignD dst (Binary src zero)));
20897 ins_cost(100);
20898 effect(TEMP tmp1, TEMP tmp2);
20899 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20900 ins_encode %{
20901 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20902 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20903 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20904 %}
20905 ins_pipe( pipe_slow );
20906 %}
20907
20908 //----------------------------- CompressBits/ExpandBits ------------------------
20909
20910 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20911 predicate(n->bottom_type()->isa_int());
20912 match(Set dst (CompressBits src mask));
20913 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20914 ins_encode %{
20915 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20916 %}
20917 ins_pipe( pipe_slow );
20918 %}
20919
20920 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20921 predicate(n->bottom_type()->isa_int());
20922 match(Set dst (ExpandBits src mask));
20923 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20924 ins_encode %{
20925 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20926 %}
20927 ins_pipe( pipe_slow );
20928 %}
20929
20930 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20931 predicate(n->bottom_type()->isa_int());
20932 match(Set dst (CompressBits src (LoadI mask)));
20933 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20934 ins_encode %{
20935 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20936 %}
20937 ins_pipe( pipe_slow );
20938 %}
20939
20940 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20941 predicate(n->bottom_type()->isa_int());
20942 match(Set dst (ExpandBits src (LoadI mask)));
20943 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20944 ins_encode %{
20945 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20946 %}
20947 ins_pipe( pipe_slow );
20948 %}
20949
20950 // --------------------------------- Sqrt --------------------------------------
20951
20952 instruct vsqrtF_reg(vec dst, vec src) %{
20953 match(Set dst (SqrtVF src));
20954 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
20955 ins_encode %{
20956 assert(UseAVX > 0, "required");
20957 int vlen_enc = vector_length_encoding(this);
20958 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20959 %}
20960 ins_pipe( pipe_slow );
20961 %}
20962
20963 instruct vsqrtF_mem(vec dst, memory mem) %{
20964 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20965 match(Set dst (SqrtVF (LoadVector mem)));
20966 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
20967 ins_encode %{
20968 assert(UseAVX > 0, "required");
20969 int vlen_enc = vector_length_encoding(this);
20970 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20971 %}
20972 ins_pipe( pipe_slow );
20973 %}
20974
20975 // Floating point vector sqrt
20976 instruct vsqrtD_reg(vec dst, vec src) %{
20977 match(Set dst (SqrtVD src));
20978 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
20979 ins_encode %{
20980 assert(UseAVX > 0, "required");
20981 int vlen_enc = vector_length_encoding(this);
20982 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20983 %}
20984 ins_pipe( pipe_slow );
20985 %}
20986
20987 instruct vsqrtD_mem(vec dst, memory mem) %{
20988 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20989 match(Set dst (SqrtVD (LoadVector mem)));
20990 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
20991 ins_encode %{
20992 assert(UseAVX > 0, "required");
20993 int vlen_enc = vector_length_encoding(this);
20994 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20995 %}
20996 ins_pipe( pipe_slow );
20997 %}
20998
20999 // ------------------------------ Shift ---------------------------------------
21000
21001 // Left and right shift count vectors are the same on x86
21002 // (only lowest bits of xmm reg are used for count).
21003 instruct vshiftcnt(vec dst, rRegI cnt) %{
21004 match(Set dst (LShiftCntV cnt));
21005 match(Set dst (RShiftCntV cnt));
21006 format %{ "movdl $dst,$cnt\t! load shift count" %}
21007 ins_encode %{
21008 __ movdl($dst$$XMMRegister, $cnt$$Register);
21009 %}
21010 ins_pipe( pipe_slow );
21011 %}
21012
21013 // Byte vector shift
21014 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21015 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21016 match(Set dst ( LShiftVB src shift));
21017 match(Set dst ( RShiftVB src shift));
21018 match(Set dst (URShiftVB src shift));
21019 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21020 format %{"vector_byte_shift $dst,$src,$shift" %}
21021 ins_encode %{
21022 assert(UseSSE > 3, "required");
21023 int opcode = this->ideal_Opcode();
21024 bool sign = (opcode != Op_URShiftVB);
21025 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21026 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21027 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21028 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21029 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21030 %}
21031 ins_pipe( pipe_slow );
21032 %}
21033
21034 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21035 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21036 UseAVX <= 1);
21037 match(Set dst ( LShiftVB src shift));
21038 match(Set dst ( RShiftVB src shift));
21039 match(Set dst (URShiftVB src shift));
21040 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21041 format %{"vector_byte_shift $dst,$src,$shift" %}
21042 ins_encode %{
21043 assert(UseSSE > 3, "required");
21044 int opcode = this->ideal_Opcode();
21045 bool sign = (opcode != Op_URShiftVB);
21046 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21047 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21048 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21049 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21050 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21051 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21052 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21053 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21054 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21055 %}
21056 ins_pipe( pipe_slow );
21057 %}
21058
21059 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21060 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21061 UseAVX > 1);
21062 match(Set dst ( LShiftVB src shift));
21063 match(Set dst ( RShiftVB src shift));
21064 match(Set dst (URShiftVB src shift));
21065 effect(TEMP dst, TEMP tmp);
21066 format %{"vector_byte_shift $dst,$src,$shift" %}
21067 ins_encode %{
21068 int opcode = this->ideal_Opcode();
21069 bool sign = (opcode != Op_URShiftVB);
21070 int vlen_enc = Assembler::AVX_256bit;
21071 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21072 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21073 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21074 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21075 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21076 %}
21077 ins_pipe( pipe_slow );
21078 %}
21079
21080 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21081 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21082 match(Set dst ( LShiftVB src shift));
21083 match(Set dst ( RShiftVB src shift));
21084 match(Set dst (URShiftVB src shift));
21085 effect(TEMP dst, TEMP tmp);
21086 format %{"vector_byte_shift $dst,$src,$shift" %}
21087 ins_encode %{
21088 assert(UseAVX > 1, "required");
21089 int opcode = this->ideal_Opcode();
21090 bool sign = (opcode != Op_URShiftVB);
21091 int vlen_enc = Assembler::AVX_256bit;
21092 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21093 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21094 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21095 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21096 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21097 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21098 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21099 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21100 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21101 %}
21102 ins_pipe( pipe_slow );
21103 %}
21104
21105 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21106 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21107 match(Set dst ( LShiftVB src shift));
21108 match(Set dst (RShiftVB src shift));
21109 match(Set dst (URShiftVB src shift));
21110 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21111 format %{"vector_byte_shift $dst,$src,$shift" %}
21112 ins_encode %{
21113 assert(UseAVX > 2, "required");
21114 int opcode = this->ideal_Opcode();
21115 bool sign = (opcode != Op_URShiftVB);
21116 int vlen_enc = Assembler::AVX_512bit;
21117 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21118 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21119 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21120 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21121 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21122 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21123 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21124 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21125 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21126 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21127 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21128 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21129 %}
21130 ins_pipe( pipe_slow );
21131 %}
21132
21133 // Shorts vector logical right shift produces incorrect Java result
21134 // for negative data because java code convert short value into int with
21135 // sign extension before a shift. But char vectors are fine since chars are
21136 // unsigned values.
21137 // Shorts/Chars vector left shift
21138 instruct vshiftS(vec dst, vec src, vec shift) %{
21139 predicate(!n->as_ShiftV()->is_var_shift());
21140 match(Set dst ( LShiftVS src shift));
21141 match(Set dst ( RShiftVS src shift));
21142 match(Set dst (URShiftVS src shift));
21143 effect(TEMP dst, USE src, USE shift);
21144 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21145 ins_encode %{
21146 int opcode = this->ideal_Opcode();
21147 if (UseAVX > 0) {
21148 int vlen_enc = vector_length_encoding(this);
21149 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21150 } else {
21151 int vlen = Matcher::vector_length(this);
21152 if (vlen == 2) {
21153 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21154 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21155 } else if (vlen == 4) {
21156 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21157 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21158 } else {
21159 assert (vlen == 8, "sanity");
21160 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21161 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21162 }
21163 }
21164 %}
21165 ins_pipe( pipe_slow );
21166 %}
21167
21168 // Integers vector left shift
21169 instruct vshiftI(vec dst, vec src, vec shift) %{
21170 predicate(!n->as_ShiftV()->is_var_shift());
21171 match(Set dst ( LShiftVI src shift));
21172 match(Set dst ( RShiftVI src shift));
21173 match(Set dst (URShiftVI src shift));
21174 effect(TEMP dst, USE src, USE shift);
21175 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21176 ins_encode %{
21177 int opcode = this->ideal_Opcode();
21178 if (UseAVX > 0) {
21179 int vlen_enc = vector_length_encoding(this);
21180 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21181 } else {
21182 int vlen = Matcher::vector_length(this);
21183 if (vlen == 2) {
21184 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21185 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21186 } else {
21187 assert(vlen == 4, "sanity");
21188 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21189 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21190 }
21191 }
21192 %}
21193 ins_pipe( pipe_slow );
21194 %}
21195
21196 // Integers vector left constant shift
21197 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21198 match(Set dst (LShiftVI src (LShiftCntV shift)));
21199 match(Set dst (RShiftVI src (RShiftCntV shift)));
21200 match(Set dst (URShiftVI src (RShiftCntV shift)));
21201 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21202 ins_encode %{
21203 int opcode = this->ideal_Opcode();
21204 if (UseAVX > 0) {
21205 int vector_len = vector_length_encoding(this);
21206 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21207 } else {
21208 int vlen = Matcher::vector_length(this);
21209 if (vlen == 2) {
21210 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21211 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21212 } else {
21213 assert(vlen == 4, "sanity");
21214 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21215 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21216 }
21217 }
21218 %}
21219 ins_pipe( pipe_slow );
21220 %}
21221
21222 // Longs vector shift
21223 instruct vshiftL(vec dst, vec src, vec shift) %{
21224 predicate(!n->as_ShiftV()->is_var_shift());
21225 match(Set dst ( LShiftVL src shift));
21226 match(Set dst (URShiftVL src shift));
21227 effect(TEMP dst, USE src, USE shift);
21228 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21229 ins_encode %{
21230 int opcode = this->ideal_Opcode();
21231 if (UseAVX > 0) {
21232 int vlen_enc = vector_length_encoding(this);
21233 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21234 } else {
21235 assert(Matcher::vector_length(this) == 2, "");
21236 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21237 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21238 }
21239 %}
21240 ins_pipe( pipe_slow );
21241 %}
21242
21243 // Longs vector constant shift
21244 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21245 match(Set dst (LShiftVL src (LShiftCntV shift)));
21246 match(Set dst (URShiftVL src (RShiftCntV shift)));
21247 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21248 ins_encode %{
21249 int opcode = this->ideal_Opcode();
21250 if (UseAVX > 0) {
21251 int vector_len = vector_length_encoding(this);
21252 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21253 } else {
21254 assert(Matcher::vector_length(this) == 2, "");
21255 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21256 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21257 }
21258 %}
21259 ins_pipe( pipe_slow );
21260 %}
21261
21262 // -------------------ArithmeticRightShift -----------------------------------
21263 // Long vector arithmetic right shift
21264 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21265 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21266 match(Set dst (RShiftVL src shift));
21267 effect(TEMP dst, TEMP tmp);
21268 format %{ "vshiftq $dst,$src,$shift" %}
21269 ins_encode %{
21270 uint vlen = Matcher::vector_length(this);
21271 if (vlen == 2) {
21272 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21273 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21274 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21275 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21276 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21277 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21278 } else {
21279 assert(vlen == 4, "sanity");
21280 assert(UseAVX > 1, "required");
21281 int vlen_enc = Assembler::AVX_256bit;
21282 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21283 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21284 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21285 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21286 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21287 }
21288 %}
21289 ins_pipe( pipe_slow );
21290 %}
21291
21292 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21293 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21294 match(Set dst (RShiftVL src shift));
21295 format %{ "vshiftq $dst,$src,$shift" %}
21296 ins_encode %{
21297 int vlen_enc = vector_length_encoding(this);
21298 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21299 %}
21300 ins_pipe( pipe_slow );
21301 %}
21302
21303 // ------------------- Variable Shift -----------------------------
21304 // Byte variable shift
21305 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21306 predicate(Matcher::vector_length(n) <= 8 &&
21307 n->as_ShiftV()->is_var_shift() &&
21308 !VM_Version::supports_avx512bw());
21309 match(Set dst ( LShiftVB src shift));
21310 match(Set dst ( RShiftVB src shift));
21311 match(Set dst (URShiftVB src shift));
21312 effect(TEMP dst, TEMP vtmp);
21313 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21314 ins_encode %{
21315 assert(UseAVX >= 2, "required");
21316
21317 int opcode = this->ideal_Opcode();
21318 int vlen_enc = Assembler::AVX_128bit;
21319 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21320 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21321 %}
21322 ins_pipe( pipe_slow );
21323 %}
21324
21325 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21326 predicate(Matcher::vector_length(n) == 16 &&
21327 n->as_ShiftV()->is_var_shift() &&
21328 !VM_Version::supports_avx512bw());
21329 match(Set dst ( LShiftVB src shift));
21330 match(Set dst ( RShiftVB src shift));
21331 match(Set dst (URShiftVB src shift));
21332 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21333 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21334 ins_encode %{
21335 assert(UseAVX >= 2, "required");
21336
21337 int opcode = this->ideal_Opcode();
21338 int vlen_enc = Assembler::AVX_128bit;
21339 // Shift lower half and get word result in dst
21340 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21341
21342 // Shift upper half and get word result in vtmp1
21343 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21344 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21345 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21346
21347 // Merge and down convert the two word results to byte in dst
21348 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21349 %}
21350 ins_pipe( pipe_slow );
21351 %}
21352
21353 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21354 predicate(Matcher::vector_length(n) == 32 &&
21355 n->as_ShiftV()->is_var_shift() &&
21356 !VM_Version::supports_avx512bw());
21357 match(Set dst ( LShiftVB src shift));
21358 match(Set dst ( RShiftVB src shift));
21359 match(Set dst (URShiftVB src shift));
21360 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21361 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21362 ins_encode %{
21363 assert(UseAVX >= 2, "required");
21364
21365 int opcode = this->ideal_Opcode();
21366 int vlen_enc = Assembler::AVX_128bit;
21367 // Process lower 128 bits and get result in dst
21368 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21369 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21370 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21371 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21372 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21373
21374 // Process higher 128 bits and get result in vtmp3
21375 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21376 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21377 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21378 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21379 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21380 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21381 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21382
21383 // Merge the two results in dst
21384 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21385 %}
21386 ins_pipe( pipe_slow );
21387 %}
21388
21389 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21390 predicate(Matcher::vector_length(n) <= 32 &&
21391 n->as_ShiftV()->is_var_shift() &&
21392 VM_Version::supports_avx512bw());
21393 match(Set dst ( LShiftVB src shift));
21394 match(Set dst ( RShiftVB src shift));
21395 match(Set dst (URShiftVB src shift));
21396 effect(TEMP dst, TEMP vtmp);
21397 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21398 ins_encode %{
21399 assert(UseAVX > 2, "required");
21400
21401 int opcode = this->ideal_Opcode();
21402 int vlen_enc = vector_length_encoding(this);
21403 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21404 %}
21405 ins_pipe( pipe_slow );
21406 %}
21407
21408 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21409 predicate(Matcher::vector_length(n) == 64 &&
21410 n->as_ShiftV()->is_var_shift() &&
21411 VM_Version::supports_avx512bw());
21412 match(Set dst ( LShiftVB src shift));
21413 match(Set dst ( RShiftVB src shift));
21414 match(Set dst (URShiftVB src shift));
21415 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21416 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21417 ins_encode %{
21418 assert(UseAVX > 2, "required");
21419
21420 int opcode = this->ideal_Opcode();
21421 int vlen_enc = Assembler::AVX_256bit;
21422 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21423 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21424 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21425 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21426 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21427 %}
21428 ins_pipe( pipe_slow );
21429 %}
21430
21431 // Short variable shift
21432 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21433 predicate(Matcher::vector_length(n) <= 8 &&
21434 n->as_ShiftV()->is_var_shift() &&
21435 !VM_Version::supports_avx512bw());
21436 match(Set dst ( LShiftVS src shift));
21437 match(Set dst ( RShiftVS src shift));
21438 match(Set dst (URShiftVS src shift));
21439 effect(TEMP dst, TEMP vtmp);
21440 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21441 ins_encode %{
21442 assert(UseAVX >= 2, "required");
21443
21444 int opcode = this->ideal_Opcode();
21445 bool sign = (opcode != Op_URShiftVS);
21446 int vlen_enc = Assembler::AVX_256bit;
21447 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21448 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21449 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21450 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21451 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21452 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21453 %}
21454 ins_pipe( pipe_slow );
21455 %}
21456
21457 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21458 predicate(Matcher::vector_length(n) == 16 &&
21459 n->as_ShiftV()->is_var_shift() &&
21460 !VM_Version::supports_avx512bw());
21461 match(Set dst ( LShiftVS src shift));
21462 match(Set dst ( RShiftVS src shift));
21463 match(Set dst (URShiftVS src shift));
21464 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21465 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21466 ins_encode %{
21467 assert(UseAVX >= 2, "required");
21468
21469 int opcode = this->ideal_Opcode();
21470 bool sign = (opcode != Op_URShiftVS);
21471 int vlen_enc = Assembler::AVX_256bit;
21472 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21473 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21474 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21475 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21476 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21477
21478 // Shift upper half, with result in dst using vtmp1 as TEMP
21479 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21480 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21481 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21482 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21483 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21484 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21485
21486 // Merge lower and upper half result into dst
21487 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21488 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21489 %}
21490 ins_pipe( pipe_slow );
21491 %}
21492
21493 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21494 predicate(n->as_ShiftV()->is_var_shift() &&
21495 VM_Version::supports_avx512bw());
21496 match(Set dst ( LShiftVS src shift));
21497 match(Set dst ( RShiftVS src shift));
21498 match(Set dst (URShiftVS src shift));
21499 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21500 ins_encode %{
21501 assert(UseAVX > 2, "required");
21502
21503 int opcode = this->ideal_Opcode();
21504 int vlen_enc = vector_length_encoding(this);
21505 if (!VM_Version::supports_avx512vl()) {
21506 vlen_enc = Assembler::AVX_512bit;
21507 }
21508 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21509 %}
21510 ins_pipe( pipe_slow );
21511 %}
21512
21513 //Integer variable shift
21514 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21515 predicate(n->as_ShiftV()->is_var_shift());
21516 match(Set dst ( LShiftVI src shift));
21517 match(Set dst ( RShiftVI src shift));
21518 match(Set dst (URShiftVI src shift));
21519 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21520 ins_encode %{
21521 assert(UseAVX >= 2, "required");
21522
21523 int opcode = this->ideal_Opcode();
21524 int vlen_enc = vector_length_encoding(this);
21525 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21526 %}
21527 ins_pipe( pipe_slow );
21528 %}
21529
21530 //Long variable shift
21531 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21532 predicate(n->as_ShiftV()->is_var_shift());
21533 match(Set dst ( LShiftVL src shift));
21534 match(Set dst (URShiftVL src shift));
21535 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21536 ins_encode %{
21537 assert(UseAVX >= 2, "required");
21538
21539 int opcode = this->ideal_Opcode();
21540 int vlen_enc = vector_length_encoding(this);
21541 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21542 %}
21543 ins_pipe( pipe_slow );
21544 %}
21545
21546 //Long variable right shift arithmetic
21547 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21548 predicate(Matcher::vector_length(n) <= 4 &&
21549 n->as_ShiftV()->is_var_shift() &&
21550 UseAVX == 2);
21551 match(Set dst (RShiftVL src shift));
21552 effect(TEMP dst, TEMP vtmp);
21553 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21554 ins_encode %{
21555 int opcode = this->ideal_Opcode();
21556 int vlen_enc = vector_length_encoding(this);
21557 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21558 $vtmp$$XMMRegister);
21559 %}
21560 ins_pipe( pipe_slow );
21561 %}
21562
21563 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21564 predicate(n->as_ShiftV()->is_var_shift() &&
21565 UseAVX > 2);
21566 match(Set dst (RShiftVL src shift));
21567 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21568 ins_encode %{
21569 int opcode = this->ideal_Opcode();
21570 int vlen_enc = vector_length_encoding(this);
21571 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21572 %}
21573 ins_pipe( pipe_slow );
21574 %}
21575
21576 // --------------------------------- AND --------------------------------------
21577
21578 instruct vand(vec dst, vec src) %{
21579 predicate(UseAVX == 0);
21580 match(Set dst (AndV dst src));
21581 format %{ "pand $dst,$src\t! and vectors" %}
21582 ins_encode %{
21583 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21584 %}
21585 ins_pipe( pipe_slow );
21586 %}
21587
21588 instruct vand_reg(vec dst, vec src1, vec src2) %{
21589 predicate(UseAVX > 0);
21590 match(Set dst (AndV src1 src2));
21591 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21592 ins_encode %{
21593 int vlen_enc = vector_length_encoding(this);
21594 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21595 %}
21596 ins_pipe( pipe_slow );
21597 %}
21598
21599 instruct vand_mem(vec dst, vec src, memory mem) %{
21600 predicate((UseAVX > 0) &&
21601 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21602 match(Set dst (AndV src (LoadVector mem)));
21603 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21604 ins_encode %{
21605 int vlen_enc = vector_length_encoding(this);
21606 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21607 %}
21608 ins_pipe( pipe_slow );
21609 %}
21610
21611 // --------------------------------- OR ---------------------------------------
21612
21613 instruct vor(vec dst, vec src) %{
21614 predicate(UseAVX == 0);
21615 match(Set dst (OrV dst src));
21616 format %{ "por $dst,$src\t! or vectors" %}
21617 ins_encode %{
21618 __ por($dst$$XMMRegister, $src$$XMMRegister);
21619 %}
21620 ins_pipe( pipe_slow );
21621 %}
21622
21623 instruct vor_reg(vec dst, vec src1, vec src2) %{
21624 predicate(UseAVX > 0);
21625 match(Set dst (OrV src1 src2));
21626 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21627 ins_encode %{
21628 int vlen_enc = vector_length_encoding(this);
21629 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21630 %}
21631 ins_pipe( pipe_slow );
21632 %}
21633
21634 instruct vor_mem(vec dst, vec src, memory mem) %{
21635 predicate((UseAVX > 0) &&
21636 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21637 match(Set dst (OrV src (LoadVector mem)));
21638 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21639 ins_encode %{
21640 int vlen_enc = vector_length_encoding(this);
21641 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21642 %}
21643 ins_pipe( pipe_slow );
21644 %}
21645
21646 // --------------------------------- XOR --------------------------------------
21647
21648 instruct vxor(vec dst, vec src) %{
21649 predicate(UseAVX == 0);
21650 match(Set dst (XorV dst src));
21651 format %{ "pxor $dst,$src\t! xor vectors" %}
21652 ins_encode %{
21653 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21654 %}
21655 ins_pipe( pipe_slow );
21656 %}
21657
21658 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21659 predicate(UseAVX > 0);
21660 match(Set dst (XorV src1 src2));
21661 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21662 ins_encode %{
21663 int vlen_enc = vector_length_encoding(this);
21664 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21665 %}
21666 ins_pipe( pipe_slow );
21667 %}
21668
21669 instruct vxor_mem(vec dst, vec src, memory mem) %{
21670 predicate((UseAVX > 0) &&
21671 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21672 match(Set dst (XorV src (LoadVector mem)));
21673 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21674 ins_encode %{
21675 int vlen_enc = vector_length_encoding(this);
21676 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21677 %}
21678 ins_pipe( pipe_slow );
21679 %}
21680
21681 // --------------------------------- VectorCast --------------------------------------
21682
21683 instruct vcastBtoX(vec dst, vec src) %{
21684 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21685 match(Set dst (VectorCastB2X src));
21686 format %{ "vector_cast_b2x $dst,$src\t!" %}
21687 ins_encode %{
21688 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21689 int vlen_enc = vector_length_encoding(this);
21690 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21691 %}
21692 ins_pipe( pipe_slow );
21693 %}
21694
21695 instruct vcastBtoD(legVec dst, legVec src) %{
21696 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21697 match(Set dst (VectorCastB2X src));
21698 format %{ "vector_cast_b2x $dst,$src\t!" %}
21699 ins_encode %{
21700 int vlen_enc = vector_length_encoding(this);
21701 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21702 %}
21703 ins_pipe( pipe_slow );
21704 %}
21705
21706 instruct castStoX(vec dst, vec src) %{
21707 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21708 Matcher::vector_length(n->in(1)) <= 8 && // src
21709 Matcher::vector_element_basic_type(n) == T_BYTE);
21710 match(Set dst (VectorCastS2X src));
21711 format %{ "vector_cast_s2x $dst,$src" %}
21712 ins_encode %{
21713 assert(UseAVX > 0, "required");
21714
21715 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21716 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21717 %}
21718 ins_pipe( pipe_slow );
21719 %}
21720
21721 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21722 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21723 Matcher::vector_length(n->in(1)) == 16 && // src
21724 Matcher::vector_element_basic_type(n) == T_BYTE);
21725 effect(TEMP dst, TEMP vtmp);
21726 match(Set dst (VectorCastS2X src));
21727 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21728 ins_encode %{
21729 assert(UseAVX > 0, "required");
21730
21731 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21732 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21733 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21734 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21735 %}
21736 ins_pipe( pipe_slow );
21737 %}
21738
21739 instruct vcastStoX_evex(vec dst, vec src) %{
21740 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21741 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21742 match(Set dst (VectorCastS2X src));
21743 format %{ "vector_cast_s2x $dst,$src\t!" %}
21744 ins_encode %{
21745 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21746 int src_vlen_enc = vector_length_encoding(this, $src);
21747 int vlen_enc = vector_length_encoding(this);
21748 switch (to_elem_bt) {
21749 case T_BYTE:
21750 if (!VM_Version::supports_avx512vl()) {
21751 vlen_enc = Assembler::AVX_512bit;
21752 }
21753 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21754 break;
21755 case T_INT:
21756 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21757 break;
21758 case T_FLOAT:
21759 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21760 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21761 break;
21762 case T_LONG:
21763 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21764 break;
21765 case T_DOUBLE: {
21766 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21767 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21768 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21769 break;
21770 }
21771 default:
21772 ShouldNotReachHere();
21773 }
21774 %}
21775 ins_pipe( pipe_slow );
21776 %}
21777
21778 instruct castItoX(vec dst, vec src) %{
21779 predicate(UseAVX <= 2 &&
21780 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21781 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21782 match(Set dst (VectorCastI2X src));
21783 format %{ "vector_cast_i2x $dst,$src" %}
21784 ins_encode %{
21785 assert(UseAVX > 0, "required");
21786
21787 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21788 int vlen_enc = vector_length_encoding(this, $src);
21789
21790 if (to_elem_bt == T_BYTE) {
21791 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21792 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21793 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21794 } else {
21795 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21796 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21797 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21798 }
21799 %}
21800 ins_pipe( pipe_slow );
21801 %}
21802
21803 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21804 predicate(UseAVX <= 2 &&
21805 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21806 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21807 match(Set dst (VectorCastI2X src));
21808 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21809 effect(TEMP dst, TEMP vtmp);
21810 ins_encode %{
21811 assert(UseAVX > 0, "required");
21812
21813 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21814 int vlen_enc = vector_length_encoding(this, $src);
21815
21816 if (to_elem_bt == T_BYTE) {
21817 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21818 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21819 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21820 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21821 } else {
21822 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21823 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21824 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21825 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21826 }
21827 %}
21828 ins_pipe( pipe_slow );
21829 %}
21830
21831 instruct vcastItoX_evex(vec dst, vec src) %{
21832 predicate(UseAVX > 2 ||
21833 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21834 match(Set dst (VectorCastI2X src));
21835 format %{ "vector_cast_i2x $dst,$src\t!" %}
21836 ins_encode %{
21837 assert(UseAVX > 0, "required");
21838
21839 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21840 int src_vlen_enc = vector_length_encoding(this, $src);
21841 int dst_vlen_enc = vector_length_encoding(this);
21842 switch (dst_elem_bt) {
21843 case T_BYTE:
21844 if (!VM_Version::supports_avx512vl()) {
21845 src_vlen_enc = Assembler::AVX_512bit;
21846 }
21847 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21848 break;
21849 case T_SHORT:
21850 if (!VM_Version::supports_avx512vl()) {
21851 src_vlen_enc = Assembler::AVX_512bit;
21852 }
21853 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21854 break;
21855 case T_FLOAT:
21856 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21857 break;
21858 case T_LONG:
21859 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21860 break;
21861 case T_DOUBLE:
21862 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21863 break;
21864 default:
21865 ShouldNotReachHere();
21866 }
21867 %}
21868 ins_pipe( pipe_slow );
21869 %}
21870
21871 instruct vcastLtoBS(vec dst, vec src) %{
21872 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21873 UseAVX <= 2);
21874 match(Set dst (VectorCastL2X src));
21875 format %{ "vector_cast_l2x $dst,$src" %}
21876 ins_encode %{
21877 assert(UseAVX > 0, "required");
21878
21879 int vlen = Matcher::vector_length_in_bytes(this, $src);
21880 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21881 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21882 : ExternalAddress(vector_int_to_short_mask());
21883 if (vlen <= 16) {
21884 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21885 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21886 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21887 } else {
21888 assert(vlen <= 32, "required");
21889 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21890 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21891 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21892 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21893 }
21894 if (to_elem_bt == T_BYTE) {
21895 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21896 }
21897 %}
21898 ins_pipe( pipe_slow );
21899 %}
21900
21901 instruct vcastLtoX_evex(vec dst, vec src) %{
21902 predicate(UseAVX > 2 ||
21903 (Matcher::vector_element_basic_type(n) == T_INT ||
21904 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21905 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21906 match(Set dst (VectorCastL2X src));
21907 format %{ "vector_cast_l2x $dst,$src\t!" %}
21908 ins_encode %{
21909 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21910 int vlen = Matcher::vector_length_in_bytes(this, $src);
21911 int vlen_enc = vector_length_encoding(this, $src);
21912 switch (to_elem_bt) {
21913 case T_BYTE:
21914 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21915 vlen_enc = Assembler::AVX_512bit;
21916 }
21917 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21918 break;
21919 case T_SHORT:
21920 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21921 vlen_enc = Assembler::AVX_512bit;
21922 }
21923 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21924 break;
21925 case T_INT:
21926 if (vlen == 8) {
21927 if ($dst$$XMMRegister != $src$$XMMRegister) {
21928 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21929 }
21930 } else if (vlen == 16) {
21931 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21932 } else if (vlen == 32) {
21933 if (UseAVX > 2) {
21934 if (!VM_Version::supports_avx512vl()) {
21935 vlen_enc = Assembler::AVX_512bit;
21936 }
21937 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21938 } else {
21939 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21940 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21941 }
21942 } else { // vlen == 64
21943 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21944 }
21945 break;
21946 case T_FLOAT:
21947 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21948 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21949 break;
21950 case T_DOUBLE:
21951 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21952 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21953 break;
21954
21955 default: assert(false, "%s", type2name(to_elem_bt));
21956 }
21957 %}
21958 ins_pipe( pipe_slow );
21959 %}
21960
21961 instruct vcastFtoD_reg(vec dst, vec src) %{
21962 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21963 match(Set dst (VectorCastF2X src));
21964 format %{ "vector_cast_f2d $dst,$src\t!" %}
21965 ins_encode %{
21966 int vlen_enc = vector_length_encoding(this);
21967 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21968 %}
21969 ins_pipe( pipe_slow );
21970 %}
21971
21972
21973 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21974 predicate(!VM_Version::supports_avx10_2() &&
21975 !VM_Version::supports_avx512vl() &&
21976 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21977 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21978 is_integral_type(Matcher::vector_element_basic_type(n)));
21979 match(Set dst (VectorCastF2X src));
21980 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21981 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21982 ins_encode %{
21983 int vlen_enc = vector_length_encoding(this, $src);
21984 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21985 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21986 // 32 bit addresses for register indirect addressing mode since stub constants
21987 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21988 // However, targets are free to increase this limit, but having a large code cache size
21989 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21990 // cap we save a temporary register allocation which in limiting case can prevent
21991 // spilling in high register pressure blocks.
21992 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21993 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21994 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21995 %}
21996 ins_pipe( pipe_slow );
21997 %}
21998
21999 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22000 predicate(!VM_Version::supports_avx10_2() &&
22001 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22002 is_integral_type(Matcher::vector_element_basic_type(n)));
22003 match(Set dst (VectorCastF2X src));
22004 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22005 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22006 ins_encode %{
22007 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22008 if (to_elem_bt == T_LONG) {
22009 int vlen_enc = vector_length_encoding(this);
22010 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22011 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22012 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22013 } else {
22014 int vlen_enc = vector_length_encoding(this, $src);
22015 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22016 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22017 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22018 }
22019 %}
22020 ins_pipe( pipe_slow );
22021 %}
22022
22023 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22024 predicate(VM_Version::supports_avx10_2() &&
22025 is_integral_type(Matcher::vector_element_basic_type(n)));
22026 match(Set dst (VectorCastF2X src));
22027 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22028 ins_encode %{
22029 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22030 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22031 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22032 %}
22033 ins_pipe( pipe_slow );
22034 %}
22035
22036 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22037 predicate(VM_Version::supports_avx10_2() &&
22038 is_integral_type(Matcher::vector_element_basic_type(n)));
22039 match(Set dst (VectorCastF2X (LoadVector src)));
22040 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22041 ins_encode %{
22042 int vlen = Matcher::vector_length(this);
22043 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22044 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22045 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22046 %}
22047 ins_pipe( pipe_slow );
22048 %}
22049
22050 instruct vcastDtoF_reg(vec dst, vec src) %{
22051 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22052 match(Set dst (VectorCastD2X src));
22053 format %{ "vector_cast_d2x $dst,$src\t!" %}
22054 ins_encode %{
22055 int vlen_enc = vector_length_encoding(this, $src);
22056 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22057 %}
22058 ins_pipe( pipe_slow );
22059 %}
22060
22061 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22062 predicate(!VM_Version::supports_avx10_2() &&
22063 !VM_Version::supports_avx512vl() &&
22064 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22065 is_integral_type(Matcher::vector_element_basic_type(n)));
22066 match(Set dst (VectorCastD2X src));
22067 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22068 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22069 ins_encode %{
22070 int vlen_enc = vector_length_encoding(this, $src);
22071 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22072 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22073 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22074 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22075 %}
22076 ins_pipe( pipe_slow );
22077 %}
22078
22079 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22080 predicate(!VM_Version::supports_avx10_2() &&
22081 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22082 is_integral_type(Matcher::vector_element_basic_type(n)));
22083 match(Set dst (VectorCastD2X src));
22084 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22085 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22086 ins_encode %{
22087 int vlen_enc = vector_length_encoding(this, $src);
22088 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22089 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22090 ExternalAddress(vector_float_signflip());
22091 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22092 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22093 %}
22094 ins_pipe( pipe_slow );
22095 %}
22096
22097 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22098 predicate(VM_Version::supports_avx10_2() &&
22099 is_integral_type(Matcher::vector_element_basic_type(n)));
22100 match(Set dst (VectorCastD2X src));
22101 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22102 ins_encode %{
22103 int vlen_enc = vector_length_encoding(this, $src);
22104 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22105 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22106 %}
22107 ins_pipe( pipe_slow );
22108 %}
22109
22110 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22111 predicate(VM_Version::supports_avx10_2() &&
22112 is_integral_type(Matcher::vector_element_basic_type(n)));
22113 match(Set dst (VectorCastD2X (LoadVector src)));
22114 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22115 ins_encode %{
22116 int vlen = Matcher::vector_length(this);
22117 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22118 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22119 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22120 %}
22121 ins_pipe( pipe_slow );
22122 %}
22123
22124 instruct vucast(vec dst, vec src) %{
22125 match(Set dst (VectorUCastB2X src));
22126 match(Set dst (VectorUCastS2X src));
22127 match(Set dst (VectorUCastI2X src));
22128 format %{ "vector_ucast $dst,$src\t!" %}
22129 ins_encode %{
22130 assert(UseAVX > 0, "required");
22131
22132 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22133 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22134 int vlen_enc = vector_length_encoding(this);
22135 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22136 %}
22137 ins_pipe( pipe_slow );
22138 %}
22139
22140 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22141 predicate(!VM_Version::supports_avx512vl() &&
22142 Matcher::vector_length_in_bytes(n) < 64 &&
22143 Matcher::vector_element_basic_type(n) == T_INT);
22144 match(Set dst (RoundVF src));
22145 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22146 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22147 ins_encode %{
22148 int vlen_enc = vector_length_encoding(this);
22149 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22150 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22151 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22152 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22153 %}
22154 ins_pipe( pipe_slow );
22155 %}
22156
22157 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22158 predicate((VM_Version::supports_avx512vl() ||
22159 Matcher::vector_length_in_bytes(n) == 64) &&
22160 Matcher::vector_element_basic_type(n) == T_INT);
22161 match(Set dst (RoundVF src));
22162 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22163 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22164 ins_encode %{
22165 int vlen_enc = vector_length_encoding(this);
22166 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22167 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22168 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22169 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22170 %}
22171 ins_pipe( pipe_slow );
22172 %}
22173
22174 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22175 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22176 match(Set dst (RoundVD src));
22177 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22178 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22179 ins_encode %{
22180 int vlen_enc = vector_length_encoding(this);
22181 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22182 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22183 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22184 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22185 %}
22186 ins_pipe( pipe_slow );
22187 %}
22188
22189 // --------------------------------- VectorMaskCmp --------------------------------------
22190
22191 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22192 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22193 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22194 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22195 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22196 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22197 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22198 ins_encode %{
22199 int vlen_enc = vector_length_encoding(this, $src1);
22200 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22201 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22202 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22203 } else {
22204 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22205 }
22206 %}
22207 ins_pipe( pipe_slow );
22208 %}
22209
22210 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22211 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22212 n->bottom_type()->isa_vectmask() == nullptr &&
22213 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22214 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22215 effect(TEMP ktmp);
22216 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22217 ins_encode %{
22218 int vlen_enc = Assembler::AVX_512bit;
22219 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22220 KRegister mask = k0; // The comparison itself is not being masked.
22221 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22222 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22223 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22224 } else {
22225 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22226 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22227 }
22228 %}
22229 ins_pipe( pipe_slow );
22230 %}
22231
22232 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22233 predicate(n->bottom_type()->isa_vectmask() &&
22234 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22235 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22236 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22237 ins_encode %{
22238 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22239 int vlen_enc = vector_length_encoding(this, $src1);
22240 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22241 KRegister mask = k0; // The comparison itself is not being masked.
22242 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22243 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22244 } else {
22245 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22246 }
22247 %}
22248 ins_pipe( pipe_slow );
22249 %}
22250
22251 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22252 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22253 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22254 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22255 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22256 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22257 (n->in(2)->get_int() == BoolTest::eq ||
22258 n->in(2)->get_int() == BoolTest::lt ||
22259 n->in(2)->get_int() == BoolTest::gt)); // cond
22260 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22261 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22262 ins_encode %{
22263 int vlen_enc = vector_length_encoding(this, $src1);
22264 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22265 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22266 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22267 %}
22268 ins_pipe( pipe_slow );
22269 %}
22270
22271 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22272 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22273 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22274 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22275 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22276 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22277 (n->in(2)->get_int() == BoolTest::ne ||
22278 n->in(2)->get_int() == BoolTest::le ||
22279 n->in(2)->get_int() == BoolTest::ge)); // cond
22280 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22281 effect(TEMP dst, TEMP xtmp);
22282 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22283 ins_encode %{
22284 int vlen_enc = vector_length_encoding(this, $src1);
22285 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22286 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22287 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22288 %}
22289 ins_pipe( pipe_slow );
22290 %}
22291
22292 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22293 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22294 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22295 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22296 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22297 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22298 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22299 effect(TEMP dst, TEMP xtmp);
22300 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22301 ins_encode %{
22302 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22303 int vlen_enc = vector_length_encoding(this, $src1);
22304 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22305 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22306
22307 if (vlen_enc == Assembler::AVX_128bit) {
22308 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22309 } else {
22310 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22311 }
22312 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22313 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22314 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22315 %}
22316 ins_pipe( pipe_slow );
22317 %}
22318
22319 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22320 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22321 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22322 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22323 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22324 effect(TEMP ktmp);
22325 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22326 ins_encode %{
22327 assert(UseAVX > 2, "required");
22328
22329 int vlen_enc = vector_length_encoding(this, $src1);
22330 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22331 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22332 KRegister mask = k0; // The comparison itself is not being masked.
22333 bool merge = false;
22334 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22335
22336 switch (src1_elem_bt) {
22337 case T_INT: {
22338 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22339 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22340 break;
22341 }
22342 case T_LONG: {
22343 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22344 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22345 break;
22346 }
22347 default: assert(false, "%s", type2name(src1_elem_bt));
22348 }
22349 %}
22350 ins_pipe( pipe_slow );
22351 %}
22352
22353
22354 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22355 predicate(n->bottom_type()->isa_vectmask() &&
22356 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22357 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22358 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22359 ins_encode %{
22360 assert(UseAVX > 2, "required");
22361 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22362
22363 int vlen_enc = vector_length_encoding(this, $src1);
22364 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22365 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22366 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22367
22368 // Comparison i
22369 switch (src1_elem_bt) {
22370 case T_BYTE: {
22371 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22372 break;
22373 }
22374 case T_SHORT: {
22375 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22376 break;
22377 }
22378 case T_INT: {
22379 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22380 break;
22381 }
22382 case T_LONG: {
22383 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22384 break;
22385 }
22386 default: assert(false, "%s", type2name(src1_elem_bt));
22387 }
22388 %}
22389 ins_pipe( pipe_slow );
22390 %}
22391
22392 // Extract
22393
22394 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22395 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22396 match(Set dst (ExtractI src idx));
22397 match(Set dst (ExtractS src idx));
22398 match(Set dst (ExtractB src idx));
22399 format %{ "extractI $dst,$src,$idx\t!" %}
22400 ins_encode %{
22401 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22402
22403 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22404 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22405 %}
22406 ins_pipe( pipe_slow );
22407 %}
22408
22409 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22410 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22411 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22412 match(Set dst (ExtractI src idx));
22413 match(Set dst (ExtractS src idx));
22414 match(Set dst (ExtractB src idx));
22415 effect(TEMP vtmp);
22416 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22417 ins_encode %{
22418 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22419
22420 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22421 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22422 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22423 %}
22424 ins_pipe( pipe_slow );
22425 %}
22426
22427 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22428 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22429 match(Set dst (ExtractL src idx));
22430 format %{ "extractL $dst,$src,$idx\t!" %}
22431 ins_encode %{
22432 assert(UseSSE >= 4, "required");
22433 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22434
22435 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22436 %}
22437 ins_pipe( pipe_slow );
22438 %}
22439
22440 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22441 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22442 Matcher::vector_length(n->in(1)) == 8); // src
22443 match(Set dst (ExtractL src idx));
22444 effect(TEMP vtmp);
22445 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22446 ins_encode %{
22447 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22448
22449 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22450 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22451 %}
22452 ins_pipe( pipe_slow );
22453 %}
22454
22455 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22456 predicate(Matcher::vector_length(n->in(1)) <= 4);
22457 match(Set dst (ExtractF src idx));
22458 effect(TEMP dst, TEMP vtmp);
22459 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22460 ins_encode %{
22461 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22462
22463 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22464 %}
22465 ins_pipe( pipe_slow );
22466 %}
22467
22468 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22469 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22470 Matcher::vector_length(n->in(1)/*src*/) == 16);
22471 match(Set dst (ExtractF src idx));
22472 effect(TEMP vtmp);
22473 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22474 ins_encode %{
22475 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22476
22477 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22478 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22479 %}
22480 ins_pipe( pipe_slow );
22481 %}
22482
22483 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22484 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22485 match(Set dst (ExtractD src idx));
22486 format %{ "extractD $dst,$src,$idx\t!" %}
22487 ins_encode %{
22488 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22489
22490 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22491 %}
22492 ins_pipe( pipe_slow );
22493 %}
22494
22495 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22496 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22497 Matcher::vector_length(n->in(1)) == 8); // src
22498 match(Set dst (ExtractD src idx));
22499 effect(TEMP vtmp);
22500 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22501 ins_encode %{
22502 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22503
22504 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22505 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22506 %}
22507 ins_pipe( pipe_slow );
22508 %}
22509
22510 // --------------------------------- Vector Blend --------------------------------------
22511
22512 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22513 predicate(UseAVX == 0);
22514 match(Set dst (VectorBlend (Binary dst src) mask));
22515 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22516 effect(TEMP tmp);
22517 ins_encode %{
22518 assert(UseSSE >= 4, "required");
22519
22520 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22521 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22522 }
22523 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22524 %}
22525 ins_pipe( pipe_slow );
22526 %}
22527
22528 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22529 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22530 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22531 Matcher::vector_length_in_bytes(n) <= 32 &&
22532 is_integral_type(Matcher::vector_element_basic_type(n)));
22533 match(Set dst (VectorBlend (Binary src1 src2) mask));
22534 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22535 ins_encode %{
22536 int vlen_enc = vector_length_encoding(this);
22537 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22538 %}
22539 ins_pipe( pipe_slow );
22540 %}
22541
22542 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22543 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22544 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22545 Matcher::vector_length_in_bytes(n) <= 32 &&
22546 !is_integral_type(Matcher::vector_element_basic_type(n)));
22547 match(Set dst (VectorBlend (Binary src1 src2) mask));
22548 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22549 ins_encode %{
22550 int vlen_enc = vector_length_encoding(this);
22551 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22552 %}
22553 ins_pipe( pipe_slow );
22554 %}
22555
22556 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22557 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22558 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22559 Matcher::vector_length_in_bytes(n) <= 32);
22560 match(Set dst (VectorBlend (Binary src1 src2) mask));
22561 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22562 effect(TEMP vtmp, TEMP dst);
22563 ins_encode %{
22564 int vlen_enc = vector_length_encoding(this);
22565 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22566 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22567 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22568 %}
22569 ins_pipe( pipe_slow );
22570 %}
22571
22572 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22573 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22574 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22575 match(Set dst (VectorBlend (Binary src1 src2) mask));
22576 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22577 effect(TEMP ktmp);
22578 ins_encode %{
22579 int vlen_enc = Assembler::AVX_512bit;
22580 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22581 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22582 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22583 %}
22584 ins_pipe( pipe_slow );
22585 %}
22586
22587
22588 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22589 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22590 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22591 VM_Version::supports_avx512bw()));
22592 match(Set dst (VectorBlend (Binary src1 src2) mask));
22593 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22594 ins_encode %{
22595 int vlen_enc = vector_length_encoding(this);
22596 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22597 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22598 %}
22599 ins_pipe( pipe_slow );
22600 %}
22601
22602 // --------------------------------- ABS --------------------------------------
22603 // a = |a|
22604 instruct vabsB_reg(vec dst, vec src) %{
22605 match(Set dst (AbsVB src));
22606 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22607 ins_encode %{
22608 uint vlen = Matcher::vector_length(this);
22609 if (vlen <= 16) {
22610 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22611 } else {
22612 int vlen_enc = vector_length_encoding(this);
22613 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22614 }
22615 %}
22616 ins_pipe( pipe_slow );
22617 %}
22618
22619 instruct vabsS_reg(vec dst, vec src) %{
22620 match(Set dst (AbsVS src));
22621 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22622 ins_encode %{
22623 uint vlen = Matcher::vector_length(this);
22624 if (vlen <= 8) {
22625 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22626 } else {
22627 int vlen_enc = vector_length_encoding(this);
22628 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22629 }
22630 %}
22631 ins_pipe( pipe_slow );
22632 %}
22633
22634 instruct vabsI_reg(vec dst, vec src) %{
22635 match(Set dst (AbsVI src));
22636 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22637 ins_encode %{
22638 uint vlen = Matcher::vector_length(this);
22639 if (vlen <= 4) {
22640 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22641 } else {
22642 int vlen_enc = vector_length_encoding(this);
22643 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22644 }
22645 %}
22646 ins_pipe( pipe_slow );
22647 %}
22648
22649 instruct vabsL_reg(vec dst, vec src) %{
22650 match(Set dst (AbsVL src));
22651 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22652 ins_encode %{
22653 assert(UseAVX > 2, "required");
22654 int vlen_enc = vector_length_encoding(this);
22655 if (!VM_Version::supports_avx512vl()) {
22656 vlen_enc = Assembler::AVX_512bit;
22657 }
22658 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22659 %}
22660 ins_pipe( pipe_slow );
22661 %}
22662
22663 // --------------------------------- ABSNEG --------------------------------------
22664
22665 instruct vabsnegF(vec dst, vec src) %{
22666 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22667 match(Set dst (AbsVF src));
22668 match(Set dst (NegVF src));
22669 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22670 ins_cost(150);
22671 ins_encode %{
22672 int opcode = this->ideal_Opcode();
22673 int vlen = Matcher::vector_length(this);
22674 if (vlen == 2) {
22675 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22676 } else {
22677 assert(vlen == 8 || vlen == 16, "required");
22678 int vlen_enc = vector_length_encoding(this);
22679 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22680 }
22681 %}
22682 ins_pipe( pipe_slow );
22683 %}
22684
22685 instruct vabsneg4F(vec dst) %{
22686 predicate(Matcher::vector_length(n) == 4);
22687 match(Set dst (AbsVF dst));
22688 match(Set dst (NegVF dst));
22689 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22690 ins_cost(150);
22691 ins_encode %{
22692 int opcode = this->ideal_Opcode();
22693 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22694 %}
22695 ins_pipe( pipe_slow );
22696 %}
22697
22698 instruct vabsnegD(vec dst, vec src) %{
22699 match(Set dst (AbsVD src));
22700 match(Set dst (NegVD src));
22701 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22702 ins_encode %{
22703 int opcode = this->ideal_Opcode();
22704 uint vlen = Matcher::vector_length(this);
22705 if (vlen == 2) {
22706 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22707 } else {
22708 int vlen_enc = vector_length_encoding(this);
22709 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22710 }
22711 %}
22712 ins_pipe( pipe_slow );
22713 %}
22714
22715 //------------------------------------- VectorTest --------------------------------------------
22716
22717 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22718 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22719 match(Set cr (VectorTest src1 src2));
22720 effect(TEMP vtmp);
22721 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22722 ins_encode %{
22723 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22724 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22725 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22726 %}
22727 ins_pipe( pipe_slow );
22728 %}
22729
22730 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22731 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22732 match(Set cr (VectorTest src1 src2));
22733 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22734 ins_encode %{
22735 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22736 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22737 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22738 %}
22739 ins_pipe( pipe_slow );
22740 %}
22741
22742 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22743 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22744 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22745 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22746 match(Set cr (VectorTest src1 src2));
22747 effect(TEMP tmp);
22748 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22749 ins_encode %{
22750 uint masklen = Matcher::vector_length(this, $src1);
22751 __ kmovwl($tmp$$Register, $src1$$KRegister);
22752 __ andl($tmp$$Register, (1 << masklen) - 1);
22753 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22754 %}
22755 ins_pipe( pipe_slow );
22756 %}
22757
22758 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22759 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22760 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22761 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22762 match(Set cr (VectorTest src1 src2));
22763 effect(TEMP tmp);
22764 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22765 ins_encode %{
22766 uint masklen = Matcher::vector_length(this, $src1);
22767 __ kmovwl($tmp$$Register, $src1$$KRegister);
22768 __ andl($tmp$$Register, (1 << masklen) - 1);
22769 %}
22770 ins_pipe( pipe_slow );
22771 %}
22772
22773 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22774 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22775 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22776 match(Set cr (VectorTest src1 src2));
22777 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22778 ins_encode %{
22779 uint masklen = Matcher::vector_length(this, $src1);
22780 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22781 %}
22782 ins_pipe( pipe_slow );
22783 %}
22784
22785 //------------------------------------- LoadMask --------------------------------------------
22786
22787 instruct loadMask(legVec dst, legVec src) %{
22788 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22789 match(Set dst (VectorLoadMask src));
22790 effect(TEMP dst);
22791 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22792 ins_encode %{
22793 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22794 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22795 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22796 %}
22797 ins_pipe( pipe_slow );
22798 %}
22799
22800 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22801 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22802 match(Set dst (VectorLoadMask src));
22803 effect(TEMP xtmp);
22804 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22805 ins_encode %{
22806 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22807 true, Assembler::AVX_512bit);
22808 %}
22809 ins_pipe( pipe_slow );
22810 %}
22811
22812 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22813 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22814 match(Set dst (VectorLoadMask src));
22815 effect(TEMP xtmp);
22816 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22817 ins_encode %{
22818 int vlen_enc = vector_length_encoding(in(1));
22819 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22820 false, vlen_enc);
22821 %}
22822 ins_pipe( pipe_slow );
22823 %}
22824
22825 //------------------------------------- StoreMask --------------------------------------------
22826
22827 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22828 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22829 match(Set dst (VectorStoreMask src size));
22830 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22831 ins_encode %{
22832 int vlen = Matcher::vector_length(this);
22833 if (vlen <= 16 && UseAVX <= 2) {
22834 assert(UseSSE >= 3, "required");
22835 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22836 } else {
22837 assert(UseAVX > 0, "required");
22838 int src_vlen_enc = vector_length_encoding(this, $src);
22839 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22840 }
22841 %}
22842 ins_pipe( pipe_slow );
22843 %}
22844
22845 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22846 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22847 match(Set dst (VectorStoreMask src size));
22848 effect(TEMP_DEF dst, TEMP xtmp);
22849 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22850 ins_encode %{
22851 int vlen_enc = Assembler::AVX_128bit;
22852 int vlen = Matcher::vector_length(this);
22853 if (vlen <= 8) {
22854 assert(UseSSE >= 3, "required");
22855 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22856 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22857 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22858 } else {
22859 assert(UseAVX > 0, "required");
22860 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22861 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22862 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22863 }
22864 %}
22865 ins_pipe( pipe_slow );
22866 %}
22867
22868 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22869 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22870 match(Set dst (VectorStoreMask src size));
22871 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22872 effect(TEMP_DEF dst, TEMP xtmp);
22873 ins_encode %{
22874 int vlen_enc = Assembler::AVX_128bit;
22875 int vlen = Matcher::vector_length(this);
22876 if (vlen <= 4) {
22877 assert(UseSSE >= 3, "required");
22878 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22879 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22880 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22881 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22882 } else {
22883 assert(UseAVX > 0, "required");
22884 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22885 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22886 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22887 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22888 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22889 }
22890 %}
22891 ins_pipe( pipe_slow );
22892 %}
22893
22894 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22895 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22896 match(Set dst (VectorStoreMask src size));
22897 effect(TEMP_DEF dst, TEMP xtmp);
22898 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22899 ins_encode %{
22900 assert(UseSSE >= 3, "required");
22901 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22902 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22903 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22904 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22905 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22906 %}
22907 ins_pipe( pipe_slow );
22908 %}
22909
22910 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22911 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22912 match(Set dst (VectorStoreMask src size));
22913 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22914 effect(TEMP_DEF dst, TEMP vtmp);
22915 ins_encode %{
22916 int vlen_enc = Assembler::AVX_128bit;
22917 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22918 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22919 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22920 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22921 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22922 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22923 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22924 %}
22925 ins_pipe( pipe_slow );
22926 %}
22927
22928 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22929 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22930 match(Set dst (VectorStoreMask src size));
22931 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22932 ins_encode %{
22933 int src_vlen_enc = vector_length_encoding(this, $src);
22934 int dst_vlen_enc = vector_length_encoding(this);
22935 if (!VM_Version::supports_avx512vl()) {
22936 src_vlen_enc = Assembler::AVX_512bit;
22937 }
22938 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22939 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22940 %}
22941 ins_pipe( pipe_slow );
22942 %}
22943
22944 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22945 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22946 match(Set dst (VectorStoreMask src size));
22947 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22948 ins_encode %{
22949 int src_vlen_enc = vector_length_encoding(this, $src);
22950 int dst_vlen_enc = vector_length_encoding(this);
22951 if (!VM_Version::supports_avx512vl()) {
22952 src_vlen_enc = Assembler::AVX_512bit;
22953 }
22954 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22955 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22956 %}
22957 ins_pipe( pipe_slow );
22958 %}
22959
22960 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22961 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22962 match(Set dst (VectorStoreMask mask size));
22963 effect(TEMP_DEF dst);
22964 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22965 ins_encode %{
22966 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22967 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22968 false, Assembler::AVX_512bit, noreg);
22969 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22970 %}
22971 ins_pipe( pipe_slow );
22972 %}
22973
22974 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22975 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22976 match(Set dst (VectorStoreMask mask size));
22977 effect(TEMP_DEF dst);
22978 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22979 ins_encode %{
22980 int dst_vlen_enc = vector_length_encoding(this);
22981 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22982 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22983 %}
22984 ins_pipe( pipe_slow );
22985 %}
22986
22987 instruct vmaskcast_evex(kReg dst) %{
22988 match(Set dst (VectorMaskCast dst));
22989 ins_cost(0);
22990 format %{ "vector_mask_cast $dst" %}
22991 ins_encode %{
22992 // empty
22993 %}
22994 ins_pipe(empty);
22995 %}
22996
22997 instruct vmaskcast(vec dst) %{
22998 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22999 match(Set dst (VectorMaskCast dst));
23000 ins_cost(0);
23001 format %{ "vector_mask_cast $dst" %}
23002 ins_encode %{
23003 // empty
23004 %}
23005 ins_pipe(empty);
23006 %}
23007
23008 instruct vmaskcast_avx(vec dst, vec src) %{
23009 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23010 match(Set dst (VectorMaskCast src));
23011 format %{ "vector_mask_cast $dst, $src" %}
23012 ins_encode %{
23013 int vlen = Matcher::vector_length(this);
23014 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23015 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23016 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23017 %}
23018 ins_pipe(pipe_slow);
23019 %}
23020
23021 //-------------------------------- Load Iota Indices ----------------------------------
23022
23023 instruct loadIotaIndices(vec dst, immI_0 src) %{
23024 match(Set dst (VectorLoadConst src));
23025 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23026 ins_encode %{
23027 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23028 BasicType bt = Matcher::vector_element_basic_type(this);
23029 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23030 %}
23031 ins_pipe( pipe_slow );
23032 %}
23033
23034 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23035 match(Set dst (PopulateIndex src1 src2));
23036 effect(TEMP dst, TEMP vtmp);
23037 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23038 ins_encode %{
23039 assert($src2$$constant == 1, "required");
23040 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23041 int vlen_enc = vector_length_encoding(this);
23042 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23043 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23044 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23045 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23046 %}
23047 ins_pipe( pipe_slow );
23048 %}
23049
23050 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23051 match(Set dst (PopulateIndex src1 src2));
23052 effect(TEMP dst, TEMP vtmp);
23053 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23054 ins_encode %{
23055 assert($src2$$constant == 1, "required");
23056 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23057 int vlen_enc = vector_length_encoding(this);
23058 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23059 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23060 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23061 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23062 %}
23063 ins_pipe( pipe_slow );
23064 %}
23065
23066 //-------------------------------- Rearrange ----------------------------------
23067
23068 // LoadShuffle/Rearrange for Byte
23069 instruct rearrangeB(vec dst, vec shuffle) %{
23070 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23071 Matcher::vector_length(n) < 32);
23072 match(Set dst (VectorRearrange dst shuffle));
23073 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23074 ins_encode %{
23075 assert(UseSSE >= 4, "required");
23076 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23077 %}
23078 ins_pipe( pipe_slow );
23079 %}
23080
23081 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23082 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23083 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23084 match(Set dst (VectorRearrange src shuffle));
23085 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23086 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23087 ins_encode %{
23088 assert(UseAVX >= 2, "required");
23089 // Swap src into vtmp1
23090 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23091 // Shuffle swapped src to get entries from other 128 bit lane
23092 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23093 // Shuffle original src to get entries from self 128 bit lane
23094 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23095 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23096 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23097 // Perform the blend
23098 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23099 %}
23100 ins_pipe( pipe_slow );
23101 %}
23102
23103
23104 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23105 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23106 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23107 match(Set dst (VectorRearrange src shuffle));
23108 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23109 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23110 ins_encode %{
23111 int vlen_enc = vector_length_encoding(this);
23112 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23113 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23114 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23115 %}
23116 ins_pipe( pipe_slow );
23117 %}
23118
23119 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23120 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23121 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23122 match(Set dst (VectorRearrange src shuffle));
23123 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23124 ins_encode %{
23125 int vlen_enc = vector_length_encoding(this);
23126 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23127 %}
23128 ins_pipe( pipe_slow );
23129 %}
23130
23131 // LoadShuffle/Rearrange for Short
23132
23133 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23134 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23135 !VM_Version::supports_avx512bw());
23136 match(Set dst (VectorLoadShuffle src));
23137 effect(TEMP dst, TEMP vtmp);
23138 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23139 ins_encode %{
23140 // Create a byte shuffle mask from short shuffle mask
23141 // only byte shuffle instruction available on these platforms
23142 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23143 if (UseAVX == 0) {
23144 assert(vlen_in_bytes <= 16, "required");
23145 // Multiply each shuffle by two to get byte index
23146 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23147 __ psllw($vtmp$$XMMRegister, 1);
23148
23149 // Duplicate to create 2 copies of byte index
23150 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23151 __ psllw($dst$$XMMRegister, 8);
23152 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23153
23154 // Add one to get alternate byte index
23155 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23156 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23157 } else {
23158 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23159 int vlen_enc = vector_length_encoding(this);
23160 // Multiply each shuffle by two to get byte index
23161 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23162
23163 // Duplicate to create 2 copies of byte index
23164 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23165 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23166
23167 // Add one to get alternate byte index
23168 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23169 }
23170 %}
23171 ins_pipe( pipe_slow );
23172 %}
23173
23174 instruct rearrangeS(vec dst, vec shuffle) %{
23175 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23176 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23177 match(Set dst (VectorRearrange dst shuffle));
23178 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23179 ins_encode %{
23180 assert(UseSSE >= 4, "required");
23181 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23182 %}
23183 ins_pipe( pipe_slow );
23184 %}
23185
23186 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23187 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23188 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23189 match(Set dst (VectorRearrange src shuffle));
23190 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23191 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23192 ins_encode %{
23193 assert(UseAVX >= 2, "required");
23194 // Swap src into vtmp1
23195 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23196 // Shuffle swapped src to get entries from other 128 bit lane
23197 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23198 // Shuffle original src to get entries from self 128 bit lane
23199 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23200 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23201 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23202 // Perform the blend
23203 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23204 %}
23205 ins_pipe( pipe_slow );
23206 %}
23207
23208 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23209 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23210 VM_Version::supports_avx512bw());
23211 match(Set dst (VectorRearrange src shuffle));
23212 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23213 ins_encode %{
23214 int vlen_enc = vector_length_encoding(this);
23215 if (!VM_Version::supports_avx512vl()) {
23216 vlen_enc = Assembler::AVX_512bit;
23217 }
23218 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23219 %}
23220 ins_pipe( pipe_slow );
23221 %}
23222
23223 // LoadShuffle/Rearrange for Integer and Float
23224
23225 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23226 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23227 Matcher::vector_length(n) == 4 && UseAVX == 0);
23228 match(Set dst (VectorLoadShuffle src));
23229 effect(TEMP dst, TEMP vtmp);
23230 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23231 ins_encode %{
23232 assert(UseSSE >= 4, "required");
23233
23234 // Create a byte shuffle mask from int shuffle mask
23235 // only byte shuffle instruction available on these platforms
23236
23237 // Duplicate and multiply each shuffle by 4
23238 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23239 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23240 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23241 __ psllw($vtmp$$XMMRegister, 2);
23242
23243 // Duplicate again to create 4 copies of byte index
23244 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23245 __ psllw($dst$$XMMRegister, 8);
23246 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23247
23248 // Add 3,2,1,0 to get alternate byte index
23249 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23250 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23251 %}
23252 ins_pipe( pipe_slow );
23253 %}
23254
23255 instruct rearrangeI(vec dst, vec shuffle) %{
23256 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23257 UseAVX == 0);
23258 match(Set dst (VectorRearrange dst shuffle));
23259 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23260 ins_encode %{
23261 assert(UseSSE >= 4, "required");
23262 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23263 %}
23264 ins_pipe( pipe_slow );
23265 %}
23266
23267 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23268 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23269 UseAVX > 0);
23270 match(Set dst (VectorRearrange src shuffle));
23271 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23272 ins_encode %{
23273 int vlen_enc = vector_length_encoding(this);
23274 BasicType bt = Matcher::vector_element_basic_type(this);
23275 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23276 %}
23277 ins_pipe( pipe_slow );
23278 %}
23279
23280 // LoadShuffle/Rearrange for Long and Double
23281
23282 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23283 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23284 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23285 match(Set dst (VectorLoadShuffle src));
23286 effect(TEMP dst, TEMP vtmp);
23287 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23288 ins_encode %{
23289 assert(UseAVX >= 2, "required");
23290
23291 int vlen_enc = vector_length_encoding(this);
23292 // Create a double word shuffle mask from long shuffle mask
23293 // only double word shuffle instruction available on these platforms
23294
23295 // Multiply each shuffle by two to get double word index
23296 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23297
23298 // Duplicate each double word shuffle
23299 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23300 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23301
23302 // Add one to get alternate double word index
23303 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23304 %}
23305 ins_pipe( pipe_slow );
23306 %}
23307
23308 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23309 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23310 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23311 match(Set dst (VectorRearrange src shuffle));
23312 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23313 ins_encode %{
23314 assert(UseAVX >= 2, "required");
23315
23316 int vlen_enc = vector_length_encoding(this);
23317 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23318 %}
23319 ins_pipe( pipe_slow );
23320 %}
23321
23322 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23323 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23324 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23325 match(Set dst (VectorRearrange src shuffle));
23326 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23327 ins_encode %{
23328 assert(UseAVX > 2, "required");
23329
23330 int vlen_enc = vector_length_encoding(this);
23331 if (vlen_enc == Assembler::AVX_128bit) {
23332 vlen_enc = Assembler::AVX_256bit;
23333 }
23334 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23335 %}
23336 ins_pipe( pipe_slow );
23337 %}
23338
23339 // --------------------------------- FMA --------------------------------------
23340 // a * b + c
23341
23342 instruct vfmaF_reg(vec a, vec b, vec c) %{
23343 match(Set c (FmaVF c (Binary a b)));
23344 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23345 ins_cost(150);
23346 ins_encode %{
23347 assert(UseFMA, "not enabled");
23348 int vlen_enc = vector_length_encoding(this);
23349 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23350 %}
23351 ins_pipe( pipe_slow );
23352 %}
23353
23354 instruct vfmaF_mem(vec a, memory b, vec c) %{
23355 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23356 match(Set c (FmaVF c (Binary a (LoadVector b))));
23357 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23358 ins_cost(150);
23359 ins_encode %{
23360 assert(UseFMA, "not enabled");
23361 int vlen_enc = vector_length_encoding(this);
23362 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23363 %}
23364 ins_pipe( pipe_slow );
23365 %}
23366
23367 instruct vfmaD_reg(vec a, vec b, vec c) %{
23368 match(Set c (FmaVD c (Binary a b)));
23369 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23370 ins_cost(150);
23371 ins_encode %{
23372 assert(UseFMA, "not enabled");
23373 int vlen_enc = vector_length_encoding(this);
23374 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23375 %}
23376 ins_pipe( pipe_slow );
23377 %}
23378
23379 instruct vfmaD_mem(vec a, memory b, vec c) %{
23380 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23381 match(Set c (FmaVD c (Binary a (LoadVector b))));
23382 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23383 ins_cost(150);
23384 ins_encode %{
23385 assert(UseFMA, "not enabled");
23386 int vlen_enc = vector_length_encoding(this);
23387 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23388 %}
23389 ins_pipe( pipe_slow );
23390 %}
23391
23392 // --------------------------------- Vector Multiply Add --------------------------------------
23393
23394 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23395 predicate(UseAVX == 0);
23396 match(Set dst (MulAddVS2VI dst src1));
23397 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23398 ins_encode %{
23399 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23400 %}
23401 ins_pipe( pipe_slow );
23402 %}
23403
23404 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23405 predicate(UseAVX > 0);
23406 match(Set dst (MulAddVS2VI src1 src2));
23407 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23408 ins_encode %{
23409 int vlen_enc = vector_length_encoding(this);
23410 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23411 %}
23412 ins_pipe( pipe_slow );
23413 %}
23414
23415 // --------------------------------- Vector Multiply Add Add ----------------------------------
23416
23417 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23418 predicate(VM_Version::supports_avx512_vnni());
23419 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23420 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23421 ins_encode %{
23422 assert(UseAVX > 2, "required");
23423 int vlen_enc = vector_length_encoding(this);
23424 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23425 %}
23426 ins_pipe( pipe_slow );
23427 ins_cost(10);
23428 %}
23429
23430 // --------------------------------- PopCount --------------------------------------
23431
23432 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23433 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23434 match(Set dst (PopCountVI src));
23435 match(Set dst (PopCountVL src));
23436 format %{ "vector_popcount_integral $dst, $src" %}
23437 ins_encode %{
23438 int opcode = this->ideal_Opcode();
23439 int vlen_enc = vector_length_encoding(this, $src);
23440 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23441 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23442 %}
23443 ins_pipe( pipe_slow );
23444 %}
23445
23446 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23447 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23448 match(Set dst (PopCountVI src mask));
23449 match(Set dst (PopCountVL src mask));
23450 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23451 ins_encode %{
23452 int vlen_enc = vector_length_encoding(this, $src);
23453 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23454 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23455 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23456 %}
23457 ins_pipe( pipe_slow );
23458 %}
23459
23460 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23461 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23462 match(Set dst (PopCountVI src));
23463 match(Set dst (PopCountVL src));
23464 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23465 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23466 ins_encode %{
23467 int opcode = this->ideal_Opcode();
23468 int vlen_enc = vector_length_encoding(this, $src);
23469 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23470 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23471 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23472 %}
23473 ins_pipe( pipe_slow );
23474 %}
23475
23476 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23477
23478 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23479 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23480 Matcher::vector_length_in_bytes(n->in(1))));
23481 match(Set dst (CountTrailingZerosV src));
23482 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23483 ins_cost(400);
23484 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23485 ins_encode %{
23486 int vlen_enc = vector_length_encoding(this, $src);
23487 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23488 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23489 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23490 %}
23491 ins_pipe( pipe_slow );
23492 %}
23493
23494 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23495 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23496 VM_Version::supports_avx512cd() &&
23497 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23498 match(Set dst (CountTrailingZerosV src));
23499 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23500 ins_cost(400);
23501 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23502 ins_encode %{
23503 int vlen_enc = vector_length_encoding(this, $src);
23504 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23505 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23506 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23507 %}
23508 ins_pipe( pipe_slow );
23509 %}
23510
23511 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23512 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23513 match(Set dst (CountTrailingZerosV src));
23514 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23515 ins_cost(400);
23516 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23517 ins_encode %{
23518 int vlen_enc = vector_length_encoding(this, $src);
23519 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23520 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23521 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23522 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23523 %}
23524 ins_pipe( pipe_slow );
23525 %}
23526
23527 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23528 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23529 match(Set dst (CountTrailingZerosV src));
23530 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23531 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23532 ins_encode %{
23533 int vlen_enc = vector_length_encoding(this, $src);
23534 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23535 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23536 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23537 %}
23538 ins_pipe( pipe_slow );
23539 %}
23540
23541
23542 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23543
23544 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23545 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23546 effect(TEMP dst);
23547 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23548 ins_encode %{
23549 int vector_len = vector_length_encoding(this);
23550 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23551 %}
23552 ins_pipe( pipe_slow );
23553 %}
23554
23555 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23556 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23557 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23558 effect(TEMP dst);
23559 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23560 ins_encode %{
23561 int vector_len = vector_length_encoding(this);
23562 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23563 %}
23564 ins_pipe( pipe_slow );
23565 %}
23566
23567 // --------------------------------- Rotation Operations ----------------------------------
23568 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23569 match(Set dst (RotateLeftV src shift));
23570 match(Set dst (RotateRightV src shift));
23571 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23572 ins_encode %{
23573 int opcode = this->ideal_Opcode();
23574 int vector_len = vector_length_encoding(this);
23575 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23576 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23577 %}
23578 ins_pipe( pipe_slow );
23579 %}
23580
23581 instruct vprorate(vec dst, vec src, vec shift) %{
23582 match(Set dst (RotateLeftV src shift));
23583 match(Set dst (RotateRightV src shift));
23584 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23585 ins_encode %{
23586 int opcode = this->ideal_Opcode();
23587 int vector_len = vector_length_encoding(this);
23588 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23589 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23590 %}
23591 ins_pipe( pipe_slow );
23592 %}
23593
23594 // ---------------------------------- Masked Operations ------------------------------------
23595 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23596 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23597 match(Set dst (LoadVectorMasked mem mask));
23598 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23599 ins_encode %{
23600 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23601 int vlen_enc = vector_length_encoding(this);
23602 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23603 %}
23604 ins_pipe( pipe_slow );
23605 %}
23606
23607
23608 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23609 predicate(n->in(3)->bottom_type()->isa_vectmask());
23610 match(Set dst (LoadVectorMasked mem mask));
23611 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23612 ins_encode %{
23613 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23614 int vector_len = vector_length_encoding(this);
23615 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23616 %}
23617 ins_pipe( pipe_slow );
23618 %}
23619
23620 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23621 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23622 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23623 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23624 ins_encode %{
23625 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23626 int vlen_enc = vector_length_encoding(src_node);
23627 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23628 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23629 %}
23630 ins_pipe( pipe_slow );
23631 %}
23632
23633 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23634 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23635 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23636 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23637 ins_encode %{
23638 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23639 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23640 int vlen_enc = vector_length_encoding(src_node);
23641 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23642 %}
23643 ins_pipe( pipe_slow );
23644 %}
23645
23646 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23647 match(Set addr (VerifyVectorAlignment addr mask));
23648 effect(KILL cr);
23649 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23650 ins_encode %{
23651 Label Lskip;
23652 // check if masked bits of addr are zero
23653 __ testq($addr$$Register, $mask$$constant);
23654 __ jccb(Assembler::equal, Lskip);
23655 __ stop("verify_vector_alignment found a misaligned vector memory access");
23656 __ bind(Lskip);
23657 %}
23658 ins_pipe(pipe_slow);
23659 %}
23660
23661 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23662 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23663 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23664 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23665 ins_encode %{
23666 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23667 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23668
23669 Label DONE;
23670 int vlen_enc = vector_length_encoding(this, $src1);
23671 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23672
23673 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23674 __ mov64($dst$$Register, -1L);
23675 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23676 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23677 __ jccb(Assembler::carrySet, DONE);
23678 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23679 __ notq($dst$$Register);
23680 __ tzcntq($dst$$Register, $dst$$Register);
23681 __ bind(DONE);
23682 %}
23683 ins_pipe( pipe_slow );
23684 %}
23685
23686
23687 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23688 match(Set dst (VectorMaskGen len));
23689 effect(TEMP temp, KILL cr);
23690 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23691 ins_encode %{
23692 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23693 %}
23694 ins_pipe( pipe_slow );
23695 %}
23696
23697 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23698 match(Set dst (VectorMaskGen len));
23699 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23700 effect(TEMP temp);
23701 ins_encode %{
23702 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23703 __ kmovql($dst$$KRegister, $temp$$Register);
23704 %}
23705 ins_pipe( pipe_slow );
23706 %}
23707
23708 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23709 predicate(n->in(1)->bottom_type()->isa_vectmask());
23710 match(Set dst (VectorMaskToLong mask));
23711 effect(TEMP dst, KILL cr);
23712 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23713 ins_encode %{
23714 int opcode = this->ideal_Opcode();
23715 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23716 int mask_len = Matcher::vector_length(this, $mask);
23717 int mask_size = mask_len * type2aelembytes(mbt);
23718 int vlen_enc = vector_length_encoding(this, $mask);
23719 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23720 $dst$$Register, mask_len, mask_size, vlen_enc);
23721 %}
23722 ins_pipe( pipe_slow );
23723 %}
23724
23725 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23726 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23727 match(Set dst (VectorMaskToLong mask));
23728 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23729 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23730 ins_encode %{
23731 int opcode = this->ideal_Opcode();
23732 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23733 int mask_len = Matcher::vector_length(this, $mask);
23734 int vlen_enc = vector_length_encoding(this, $mask);
23735 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23736 $dst$$Register, mask_len, mbt, vlen_enc);
23737 %}
23738 ins_pipe( pipe_slow );
23739 %}
23740
23741 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23742 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23743 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23744 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23745 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23746 ins_encode %{
23747 int opcode = this->ideal_Opcode();
23748 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23749 int mask_len = Matcher::vector_length(this, $mask);
23750 int vlen_enc = vector_length_encoding(this, $mask);
23751 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23752 $dst$$Register, mask_len, mbt, vlen_enc);
23753 %}
23754 ins_pipe( pipe_slow );
23755 %}
23756
23757 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23758 predicate(n->in(1)->bottom_type()->isa_vectmask());
23759 match(Set dst (VectorMaskTrueCount mask));
23760 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23761 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23762 ins_encode %{
23763 int opcode = this->ideal_Opcode();
23764 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23765 int mask_len = Matcher::vector_length(this, $mask);
23766 int mask_size = mask_len * type2aelembytes(mbt);
23767 int vlen_enc = vector_length_encoding(this, $mask);
23768 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23769 $tmp$$Register, mask_len, mask_size, vlen_enc);
23770 %}
23771 ins_pipe( pipe_slow );
23772 %}
23773
23774 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23775 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23776 match(Set dst (VectorMaskTrueCount mask));
23777 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23778 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23779 ins_encode %{
23780 int opcode = this->ideal_Opcode();
23781 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23782 int mask_len = Matcher::vector_length(this, $mask);
23783 int vlen_enc = vector_length_encoding(this, $mask);
23784 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23785 $tmp$$Register, mask_len, mbt, vlen_enc);
23786 %}
23787 ins_pipe( pipe_slow );
23788 %}
23789
23790 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23791 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23792 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23793 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23794 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23795 ins_encode %{
23796 int opcode = this->ideal_Opcode();
23797 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23798 int mask_len = Matcher::vector_length(this, $mask);
23799 int vlen_enc = vector_length_encoding(this, $mask);
23800 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23801 $tmp$$Register, mask_len, mbt, vlen_enc);
23802 %}
23803 ins_pipe( pipe_slow );
23804 %}
23805
23806 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23807 predicate(n->in(1)->bottom_type()->isa_vectmask());
23808 match(Set dst (VectorMaskFirstTrue mask));
23809 match(Set dst (VectorMaskLastTrue mask));
23810 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23811 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23812 ins_encode %{
23813 int opcode = this->ideal_Opcode();
23814 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23815 int mask_len = Matcher::vector_length(this, $mask);
23816 int mask_size = mask_len * type2aelembytes(mbt);
23817 int vlen_enc = vector_length_encoding(this, $mask);
23818 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23819 $tmp$$Register, mask_len, mask_size, vlen_enc);
23820 %}
23821 ins_pipe( pipe_slow );
23822 %}
23823
23824 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23825 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23826 match(Set dst (VectorMaskFirstTrue mask));
23827 match(Set dst (VectorMaskLastTrue mask));
23828 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23829 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23830 ins_encode %{
23831 int opcode = this->ideal_Opcode();
23832 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23833 int mask_len = Matcher::vector_length(this, $mask);
23834 int vlen_enc = vector_length_encoding(this, $mask);
23835 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23836 $tmp$$Register, mask_len, mbt, vlen_enc);
23837 %}
23838 ins_pipe( pipe_slow );
23839 %}
23840
23841 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23842 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23843 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23844 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23845 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23846 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23847 ins_encode %{
23848 int opcode = this->ideal_Opcode();
23849 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23850 int mask_len = Matcher::vector_length(this, $mask);
23851 int vlen_enc = vector_length_encoding(this, $mask);
23852 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23853 $tmp$$Register, mask_len, mbt, vlen_enc);
23854 %}
23855 ins_pipe( pipe_slow );
23856 %}
23857
23858 // --------------------------------- Compress/Expand Operations ---------------------------
23859 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23860 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23861 match(Set dst (CompressV src mask));
23862 match(Set dst (ExpandV src mask));
23863 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23864 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23865 ins_encode %{
23866 int opcode = this->ideal_Opcode();
23867 int vlen_enc = vector_length_encoding(this);
23868 BasicType bt = Matcher::vector_element_basic_type(this);
23869 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23870 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23871 %}
23872 ins_pipe( pipe_slow );
23873 %}
23874
23875 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23876 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23877 match(Set dst (CompressV src mask));
23878 match(Set dst (ExpandV src mask));
23879 format %{ "vector_compress_expand $dst, $src, $mask" %}
23880 ins_encode %{
23881 int opcode = this->ideal_Opcode();
23882 int vector_len = vector_length_encoding(this);
23883 BasicType bt = Matcher::vector_element_basic_type(this);
23884 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23885 %}
23886 ins_pipe( pipe_slow );
23887 %}
23888
23889 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23890 match(Set dst (CompressM mask));
23891 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23892 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23893 ins_encode %{
23894 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23895 int mask_len = Matcher::vector_length(this);
23896 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23897 %}
23898 ins_pipe( pipe_slow );
23899 %}
23900
23901 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23902
23903 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23904 predicate(!VM_Version::supports_gfni());
23905 match(Set dst (ReverseV src));
23906 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23907 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23908 ins_encode %{
23909 int vec_enc = vector_length_encoding(this);
23910 BasicType bt = Matcher::vector_element_basic_type(this);
23911 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23912 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23913 %}
23914 ins_pipe( pipe_slow );
23915 %}
23916
23917 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23918 predicate(VM_Version::supports_gfni());
23919 match(Set dst (ReverseV src));
23920 effect(TEMP dst, TEMP xtmp);
23921 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23922 ins_encode %{
23923 int vec_enc = vector_length_encoding(this);
23924 BasicType bt = Matcher::vector_element_basic_type(this);
23925 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23926 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23927 $xtmp$$XMMRegister);
23928 %}
23929 ins_pipe( pipe_slow );
23930 %}
23931
23932 instruct vreverse_byte_reg(vec dst, vec src) %{
23933 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23934 match(Set dst (ReverseBytesV src));
23935 effect(TEMP dst);
23936 format %{ "vector_reverse_byte $dst, $src" %}
23937 ins_encode %{
23938 int vec_enc = vector_length_encoding(this);
23939 BasicType bt = Matcher::vector_element_basic_type(this);
23940 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23941 %}
23942 ins_pipe( pipe_slow );
23943 %}
23944
23945 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23946 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23947 match(Set dst (ReverseBytesV src));
23948 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23949 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23950 ins_encode %{
23951 int vec_enc = vector_length_encoding(this);
23952 BasicType bt = Matcher::vector_element_basic_type(this);
23953 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23954 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23955 %}
23956 ins_pipe( pipe_slow );
23957 %}
23958
23959 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23960
23961 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23962 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23963 Matcher::vector_length_in_bytes(n->in(1))));
23964 match(Set dst (CountLeadingZerosV src));
23965 format %{ "vector_count_leading_zeros $dst, $src" %}
23966 ins_encode %{
23967 int vlen_enc = vector_length_encoding(this, $src);
23968 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23969 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23970 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23971 %}
23972 ins_pipe( pipe_slow );
23973 %}
23974
23975 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23976 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23977 Matcher::vector_length_in_bytes(n->in(1))));
23978 match(Set dst (CountLeadingZerosV src mask));
23979 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23980 ins_encode %{
23981 int vlen_enc = vector_length_encoding(this, $src);
23982 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23983 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23984 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23985 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23986 %}
23987 ins_pipe( pipe_slow );
23988 %}
23989
23990 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23991 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23992 VM_Version::supports_avx512cd() &&
23993 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23994 match(Set dst (CountLeadingZerosV src));
23995 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23996 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23997 ins_encode %{
23998 int vlen_enc = vector_length_encoding(this, $src);
23999 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24000 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24001 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24002 %}
24003 ins_pipe( pipe_slow );
24004 %}
24005
24006 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24007 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24008 match(Set dst (CountLeadingZerosV src));
24009 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24010 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24011 ins_encode %{
24012 int vlen_enc = vector_length_encoding(this, $src);
24013 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24014 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24015 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24016 $rtmp$$Register, true, vlen_enc);
24017 %}
24018 ins_pipe( pipe_slow );
24019 %}
24020
24021 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24022 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24023 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24024 match(Set dst (CountLeadingZerosV src));
24025 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24026 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24027 ins_encode %{
24028 int vlen_enc = vector_length_encoding(this, $src);
24029 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24030 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24031 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24032 %}
24033 ins_pipe( pipe_slow );
24034 %}
24035
24036 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24037 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24038 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24039 match(Set dst (CountLeadingZerosV src));
24040 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24041 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24042 ins_encode %{
24043 int vlen_enc = vector_length_encoding(this, $src);
24044 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24045 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24046 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24047 %}
24048 ins_pipe( pipe_slow );
24049 %}
24050
24051 // ---------------------------------- Vector Masked Operations ------------------------------------
24052
24053 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24054 match(Set dst (AddVB (Binary dst src2) mask));
24055 match(Set dst (AddVS (Binary dst src2) mask));
24056 match(Set dst (AddVI (Binary dst src2) mask));
24057 match(Set dst (AddVL (Binary dst src2) mask));
24058 match(Set dst (AddVF (Binary dst src2) mask));
24059 match(Set dst (AddVD (Binary dst src2) mask));
24060 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24061 ins_encode %{
24062 int vlen_enc = vector_length_encoding(this);
24063 BasicType bt = Matcher::vector_element_basic_type(this);
24064 int opc = this->ideal_Opcode();
24065 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24066 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24067 %}
24068 ins_pipe( pipe_slow );
24069 %}
24070
24071 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24072 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24073 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24074 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24075 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24076 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24077 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24078 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24079 ins_encode %{
24080 int vlen_enc = vector_length_encoding(this);
24081 BasicType bt = Matcher::vector_element_basic_type(this);
24082 int opc = this->ideal_Opcode();
24083 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24084 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24085 %}
24086 ins_pipe( pipe_slow );
24087 %}
24088
24089 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24090 match(Set dst (XorV (Binary dst src2) mask));
24091 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24092 ins_encode %{
24093 int vlen_enc = vector_length_encoding(this);
24094 BasicType bt = Matcher::vector_element_basic_type(this);
24095 int opc = this->ideal_Opcode();
24096 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24097 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24098 %}
24099 ins_pipe( pipe_slow );
24100 %}
24101
24102 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24103 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24104 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24105 ins_encode %{
24106 int vlen_enc = vector_length_encoding(this);
24107 BasicType bt = Matcher::vector_element_basic_type(this);
24108 int opc = this->ideal_Opcode();
24109 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24110 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24111 %}
24112 ins_pipe( pipe_slow );
24113 %}
24114
24115 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24116 match(Set dst (OrV (Binary dst src2) mask));
24117 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24118 ins_encode %{
24119 int vlen_enc = vector_length_encoding(this);
24120 BasicType bt = Matcher::vector_element_basic_type(this);
24121 int opc = this->ideal_Opcode();
24122 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24123 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24124 %}
24125 ins_pipe( pipe_slow );
24126 %}
24127
24128 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24129 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24130 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24131 ins_encode %{
24132 int vlen_enc = vector_length_encoding(this);
24133 BasicType bt = Matcher::vector_element_basic_type(this);
24134 int opc = this->ideal_Opcode();
24135 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24136 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24137 %}
24138 ins_pipe( pipe_slow );
24139 %}
24140
24141 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24142 match(Set dst (AndV (Binary dst src2) mask));
24143 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24144 ins_encode %{
24145 int vlen_enc = vector_length_encoding(this);
24146 BasicType bt = Matcher::vector_element_basic_type(this);
24147 int opc = this->ideal_Opcode();
24148 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24149 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24150 %}
24151 ins_pipe( pipe_slow );
24152 %}
24153
24154 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24155 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24156 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24157 ins_encode %{
24158 int vlen_enc = vector_length_encoding(this);
24159 BasicType bt = Matcher::vector_element_basic_type(this);
24160 int opc = this->ideal_Opcode();
24161 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24162 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24163 %}
24164 ins_pipe( pipe_slow );
24165 %}
24166
24167 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24168 match(Set dst (SubVB (Binary dst src2) mask));
24169 match(Set dst (SubVS (Binary dst src2) mask));
24170 match(Set dst (SubVI (Binary dst src2) mask));
24171 match(Set dst (SubVL (Binary dst src2) mask));
24172 match(Set dst (SubVF (Binary dst src2) mask));
24173 match(Set dst (SubVD (Binary dst src2) mask));
24174 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24175 ins_encode %{
24176 int vlen_enc = vector_length_encoding(this);
24177 BasicType bt = Matcher::vector_element_basic_type(this);
24178 int opc = this->ideal_Opcode();
24179 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24180 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24181 %}
24182 ins_pipe( pipe_slow );
24183 %}
24184
24185 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24186 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24187 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24188 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24189 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24190 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24191 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24192 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24193 ins_encode %{
24194 int vlen_enc = vector_length_encoding(this);
24195 BasicType bt = Matcher::vector_element_basic_type(this);
24196 int opc = this->ideal_Opcode();
24197 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24198 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24199 %}
24200 ins_pipe( pipe_slow );
24201 %}
24202
24203 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24204 match(Set dst (MulVS (Binary dst src2) mask));
24205 match(Set dst (MulVI (Binary dst src2) mask));
24206 match(Set dst (MulVL (Binary dst src2) mask));
24207 match(Set dst (MulVF (Binary dst src2) mask));
24208 match(Set dst (MulVD (Binary dst src2) mask));
24209 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24210 ins_encode %{
24211 int vlen_enc = vector_length_encoding(this);
24212 BasicType bt = Matcher::vector_element_basic_type(this);
24213 int opc = this->ideal_Opcode();
24214 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24215 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24216 %}
24217 ins_pipe( pipe_slow );
24218 %}
24219
24220 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24221 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24222 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24223 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24224 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24225 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24226 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24227 ins_encode %{
24228 int vlen_enc = vector_length_encoding(this);
24229 BasicType bt = Matcher::vector_element_basic_type(this);
24230 int opc = this->ideal_Opcode();
24231 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24232 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24233 %}
24234 ins_pipe( pipe_slow );
24235 %}
24236
24237 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24238 match(Set dst (SqrtVF dst mask));
24239 match(Set dst (SqrtVD dst mask));
24240 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24241 ins_encode %{
24242 int vlen_enc = vector_length_encoding(this);
24243 BasicType bt = Matcher::vector_element_basic_type(this);
24244 int opc = this->ideal_Opcode();
24245 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24246 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24247 %}
24248 ins_pipe( pipe_slow );
24249 %}
24250
24251 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24252 match(Set dst (DivVF (Binary dst src2) mask));
24253 match(Set dst (DivVD (Binary dst src2) mask));
24254 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24255 ins_encode %{
24256 int vlen_enc = vector_length_encoding(this);
24257 BasicType bt = Matcher::vector_element_basic_type(this);
24258 int opc = this->ideal_Opcode();
24259 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24260 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24261 %}
24262 ins_pipe( pipe_slow );
24263 %}
24264
24265 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24266 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24267 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24268 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24269 ins_encode %{
24270 int vlen_enc = vector_length_encoding(this);
24271 BasicType bt = Matcher::vector_element_basic_type(this);
24272 int opc = this->ideal_Opcode();
24273 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24274 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24275 %}
24276 ins_pipe( pipe_slow );
24277 %}
24278
24279
24280 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24281 match(Set dst (RotateLeftV (Binary dst shift) mask));
24282 match(Set dst (RotateRightV (Binary dst shift) mask));
24283 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24284 ins_encode %{
24285 int vlen_enc = vector_length_encoding(this);
24286 BasicType bt = Matcher::vector_element_basic_type(this);
24287 int opc = this->ideal_Opcode();
24288 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24289 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24290 %}
24291 ins_pipe( pipe_slow );
24292 %}
24293
24294 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24295 match(Set dst (RotateLeftV (Binary dst src2) mask));
24296 match(Set dst (RotateRightV (Binary dst src2) mask));
24297 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24298 ins_encode %{
24299 int vlen_enc = vector_length_encoding(this);
24300 BasicType bt = Matcher::vector_element_basic_type(this);
24301 int opc = this->ideal_Opcode();
24302 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24303 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24304 %}
24305 ins_pipe( pipe_slow );
24306 %}
24307
24308 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24309 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24310 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24311 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24312 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24313 ins_encode %{
24314 int vlen_enc = vector_length_encoding(this);
24315 BasicType bt = Matcher::vector_element_basic_type(this);
24316 int opc = this->ideal_Opcode();
24317 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24318 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24319 %}
24320 ins_pipe( pipe_slow );
24321 %}
24322
24323 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24324 predicate(!n->as_ShiftV()->is_var_shift());
24325 match(Set dst (LShiftVS (Binary dst src2) mask));
24326 match(Set dst (LShiftVI (Binary dst src2) mask));
24327 match(Set dst (LShiftVL (Binary dst src2) mask));
24328 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24329 ins_encode %{
24330 int vlen_enc = vector_length_encoding(this);
24331 BasicType bt = Matcher::vector_element_basic_type(this);
24332 int opc = this->ideal_Opcode();
24333 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24334 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24335 %}
24336 ins_pipe( pipe_slow );
24337 %}
24338
24339 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24340 predicate(n->as_ShiftV()->is_var_shift());
24341 match(Set dst (LShiftVS (Binary dst src2) mask));
24342 match(Set dst (LShiftVI (Binary dst src2) mask));
24343 match(Set dst (LShiftVL (Binary dst src2) mask));
24344 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24345 ins_encode %{
24346 int vlen_enc = vector_length_encoding(this);
24347 BasicType bt = Matcher::vector_element_basic_type(this);
24348 int opc = this->ideal_Opcode();
24349 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24350 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24351 %}
24352 ins_pipe( pipe_slow );
24353 %}
24354
24355 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24356 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24357 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24358 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24359 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24360 ins_encode %{
24361 int vlen_enc = vector_length_encoding(this);
24362 BasicType bt = Matcher::vector_element_basic_type(this);
24363 int opc = this->ideal_Opcode();
24364 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24365 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24366 %}
24367 ins_pipe( pipe_slow );
24368 %}
24369
24370 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24371 predicate(!n->as_ShiftV()->is_var_shift());
24372 match(Set dst (RShiftVS (Binary dst src2) mask));
24373 match(Set dst (RShiftVI (Binary dst src2) mask));
24374 match(Set dst (RShiftVL (Binary dst src2) mask));
24375 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24376 ins_encode %{
24377 int vlen_enc = vector_length_encoding(this);
24378 BasicType bt = Matcher::vector_element_basic_type(this);
24379 int opc = this->ideal_Opcode();
24380 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24381 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24382 %}
24383 ins_pipe( pipe_slow );
24384 %}
24385
24386 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24387 predicate(n->as_ShiftV()->is_var_shift());
24388 match(Set dst (RShiftVS (Binary dst src2) mask));
24389 match(Set dst (RShiftVI (Binary dst src2) mask));
24390 match(Set dst (RShiftVL (Binary dst src2) mask));
24391 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24392 ins_encode %{
24393 int vlen_enc = vector_length_encoding(this);
24394 BasicType bt = Matcher::vector_element_basic_type(this);
24395 int opc = this->ideal_Opcode();
24396 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24397 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24398 %}
24399 ins_pipe( pipe_slow );
24400 %}
24401
24402 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24403 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24404 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24405 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24406 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24407 ins_encode %{
24408 int vlen_enc = vector_length_encoding(this);
24409 BasicType bt = Matcher::vector_element_basic_type(this);
24410 int opc = this->ideal_Opcode();
24411 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24412 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24413 %}
24414 ins_pipe( pipe_slow );
24415 %}
24416
24417 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24418 predicate(!n->as_ShiftV()->is_var_shift());
24419 match(Set dst (URShiftVS (Binary dst src2) mask));
24420 match(Set dst (URShiftVI (Binary dst src2) mask));
24421 match(Set dst (URShiftVL (Binary dst src2) mask));
24422 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24423 ins_encode %{
24424 int vlen_enc = vector_length_encoding(this);
24425 BasicType bt = Matcher::vector_element_basic_type(this);
24426 int opc = this->ideal_Opcode();
24427 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24428 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24429 %}
24430 ins_pipe( pipe_slow );
24431 %}
24432
24433 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24434 predicate(n->as_ShiftV()->is_var_shift());
24435 match(Set dst (URShiftVS (Binary dst src2) mask));
24436 match(Set dst (URShiftVI (Binary dst src2) mask));
24437 match(Set dst (URShiftVL (Binary dst src2) mask));
24438 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24439 ins_encode %{
24440 int vlen_enc = vector_length_encoding(this);
24441 BasicType bt = Matcher::vector_element_basic_type(this);
24442 int opc = this->ideal_Opcode();
24443 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24444 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24445 %}
24446 ins_pipe( pipe_slow );
24447 %}
24448
24449 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24450 match(Set dst (MaxV (Binary dst src2) mask));
24451 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24452 ins_encode %{
24453 int vlen_enc = vector_length_encoding(this);
24454 BasicType bt = Matcher::vector_element_basic_type(this);
24455 int opc = this->ideal_Opcode();
24456 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24457 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24458 %}
24459 ins_pipe( pipe_slow );
24460 %}
24461
24462 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24463 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24464 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24465 ins_encode %{
24466 int vlen_enc = vector_length_encoding(this);
24467 BasicType bt = Matcher::vector_element_basic_type(this);
24468 int opc = this->ideal_Opcode();
24469 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24470 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24471 %}
24472 ins_pipe( pipe_slow );
24473 %}
24474
24475 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24476 match(Set dst (MinV (Binary dst src2) mask));
24477 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24478 ins_encode %{
24479 int vlen_enc = vector_length_encoding(this);
24480 BasicType bt = Matcher::vector_element_basic_type(this);
24481 int opc = this->ideal_Opcode();
24482 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24483 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24484 %}
24485 ins_pipe( pipe_slow );
24486 %}
24487
24488 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24489 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24490 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24491 ins_encode %{
24492 int vlen_enc = vector_length_encoding(this);
24493 BasicType bt = Matcher::vector_element_basic_type(this);
24494 int opc = this->ideal_Opcode();
24495 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24496 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24497 %}
24498 ins_pipe( pipe_slow );
24499 %}
24500
24501 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24502 match(Set dst (VectorRearrange (Binary dst src2) mask));
24503 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24504 ins_encode %{
24505 int vlen_enc = vector_length_encoding(this);
24506 BasicType bt = Matcher::vector_element_basic_type(this);
24507 int opc = this->ideal_Opcode();
24508 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24509 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24510 %}
24511 ins_pipe( pipe_slow );
24512 %}
24513
24514 instruct vabs_masked(vec dst, kReg mask) %{
24515 match(Set dst (AbsVB dst mask));
24516 match(Set dst (AbsVS dst mask));
24517 match(Set dst (AbsVI dst mask));
24518 match(Set dst (AbsVL dst mask));
24519 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24520 ins_encode %{
24521 int vlen_enc = vector_length_encoding(this);
24522 BasicType bt = Matcher::vector_element_basic_type(this);
24523 int opc = this->ideal_Opcode();
24524 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24525 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24526 %}
24527 ins_pipe( pipe_slow );
24528 %}
24529
24530 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24531 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24532 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24533 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24534 ins_encode %{
24535 assert(UseFMA, "Needs FMA instructions support.");
24536 int vlen_enc = vector_length_encoding(this);
24537 BasicType bt = Matcher::vector_element_basic_type(this);
24538 int opc = this->ideal_Opcode();
24539 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24540 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24541 %}
24542 ins_pipe( pipe_slow );
24543 %}
24544
24545 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24546 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24547 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24548 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24549 ins_encode %{
24550 assert(UseFMA, "Needs FMA instructions support.");
24551 int vlen_enc = vector_length_encoding(this);
24552 BasicType bt = Matcher::vector_element_basic_type(this);
24553 int opc = this->ideal_Opcode();
24554 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24555 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24556 %}
24557 ins_pipe( pipe_slow );
24558 %}
24559
24560 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24561 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24562 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24563 ins_encode %{
24564 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24565 int vlen_enc = vector_length_encoding(this, $src1);
24566 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24567
24568 // Comparison i
24569 switch (src1_elem_bt) {
24570 case T_BYTE: {
24571 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24572 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24573 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24574 break;
24575 }
24576 case T_SHORT: {
24577 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24578 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24579 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24580 break;
24581 }
24582 case T_INT: {
24583 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24584 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24585 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24586 break;
24587 }
24588 case T_LONG: {
24589 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24590 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24591 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24592 break;
24593 }
24594 case T_FLOAT: {
24595 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24596 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24597 break;
24598 }
24599 case T_DOUBLE: {
24600 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24601 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24602 break;
24603 }
24604 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24605 }
24606 %}
24607 ins_pipe( pipe_slow );
24608 %}
24609
24610 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24611 predicate(Matcher::vector_length(n) <= 32);
24612 match(Set dst (MaskAll src));
24613 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24614 ins_encode %{
24615 int mask_len = Matcher::vector_length(this);
24616 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24617 %}
24618 ins_pipe( pipe_slow );
24619 %}
24620
24621 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24622 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24623 match(Set dst (XorVMask src (MaskAll cnt)));
24624 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24625 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24626 ins_encode %{
24627 uint masklen = Matcher::vector_length(this);
24628 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24629 %}
24630 ins_pipe( pipe_slow );
24631 %}
24632
24633 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24634 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24635 (Matcher::vector_length(n) == 16) ||
24636 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24637 match(Set dst (XorVMask src (MaskAll cnt)));
24638 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24639 ins_encode %{
24640 uint masklen = Matcher::vector_length(this);
24641 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24642 %}
24643 ins_pipe( pipe_slow );
24644 %}
24645
24646 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24647 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24648 match(Set dst (VectorLongToMask src));
24649 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24650 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24651 ins_encode %{
24652 int mask_len = Matcher::vector_length(this);
24653 int vec_enc = vector_length_encoding(mask_len);
24654 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24655 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24656 %}
24657 ins_pipe( pipe_slow );
24658 %}
24659
24660
24661 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24662 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24663 match(Set dst (VectorLongToMask src));
24664 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24665 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24666 ins_encode %{
24667 int mask_len = Matcher::vector_length(this);
24668 assert(mask_len <= 32, "invalid mask length");
24669 int vec_enc = vector_length_encoding(mask_len);
24670 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24671 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24672 %}
24673 ins_pipe( pipe_slow );
24674 %}
24675
24676 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24677 predicate(n->bottom_type()->isa_vectmask());
24678 match(Set dst (VectorLongToMask src));
24679 format %{ "long_to_mask_evex $dst, $src\t!" %}
24680 ins_encode %{
24681 __ kmov($dst$$KRegister, $src$$Register);
24682 %}
24683 ins_pipe( pipe_slow );
24684 %}
24685
24686 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24687 match(Set dst (AndVMask src1 src2));
24688 match(Set dst (OrVMask src1 src2));
24689 match(Set dst (XorVMask src1 src2));
24690 effect(TEMP kscratch);
24691 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24692 ins_encode %{
24693 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24694 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24695 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24696 uint masklen = Matcher::vector_length(this);
24697 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24698 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24699 %}
24700 ins_pipe( pipe_slow );
24701 %}
24702
24703 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24704 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24705 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24706 ins_encode %{
24707 int vlen_enc = vector_length_encoding(this);
24708 BasicType bt = Matcher::vector_element_basic_type(this);
24709 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24710 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24711 %}
24712 ins_pipe( pipe_slow );
24713 %}
24714
24715 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24716 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24717 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24718 ins_encode %{
24719 int vlen_enc = vector_length_encoding(this);
24720 BasicType bt = Matcher::vector_element_basic_type(this);
24721 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24722 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24723 %}
24724 ins_pipe( pipe_slow );
24725 %}
24726
24727 instruct castMM(kReg dst)
24728 %{
24729 match(Set dst (CastVV dst));
24730
24731 size(0);
24732 format %{ "# castVV of $dst" %}
24733 ins_encode(/* empty encoding */);
24734 ins_cost(0);
24735 ins_pipe(empty);
24736 %}
24737
24738 instruct castVV(vec dst)
24739 %{
24740 match(Set dst (CastVV dst));
24741
24742 size(0);
24743 format %{ "# castVV of $dst" %}
24744 ins_encode(/* empty encoding */);
24745 ins_cost(0);
24746 ins_pipe(empty);
24747 %}
24748
24749 instruct castVVLeg(legVec dst)
24750 %{
24751 match(Set dst (CastVV dst));
24752
24753 size(0);
24754 format %{ "# castVV of $dst" %}
24755 ins_encode(/* empty encoding */);
24756 ins_cost(0);
24757 ins_pipe(empty);
24758 %}
24759
24760 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24761 %{
24762 match(Set dst (IsInfiniteF src));
24763 effect(TEMP ktmp, KILL cr);
24764 format %{ "float_class_check $dst, $src" %}
24765 ins_encode %{
24766 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24767 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24768 %}
24769 ins_pipe(pipe_slow);
24770 %}
24771
24772 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24773 %{
24774 match(Set dst (IsInfiniteD src));
24775 effect(TEMP ktmp, KILL cr);
24776 format %{ "double_class_check $dst, $src" %}
24777 ins_encode %{
24778 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24779 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24780 %}
24781 ins_pipe(pipe_slow);
24782 %}
24783
24784 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24785 %{
24786 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24787 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24788 match(Set dst (SaturatingAddV src1 src2));
24789 match(Set dst (SaturatingSubV src1 src2));
24790 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24791 ins_encode %{
24792 int vlen_enc = vector_length_encoding(this);
24793 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24794 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24795 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24796 %}
24797 ins_pipe(pipe_slow);
24798 %}
24799
24800 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24801 %{
24802 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24803 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24804 match(Set dst (SaturatingAddV src1 src2));
24805 match(Set dst (SaturatingSubV src1 src2));
24806 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24807 ins_encode %{
24808 int vlen_enc = vector_length_encoding(this);
24809 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24810 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24811 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24812 %}
24813 ins_pipe(pipe_slow);
24814 %}
24815
24816 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24817 %{
24818 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24819 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24820 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24821 match(Set dst (SaturatingAddV src1 src2));
24822 match(Set dst (SaturatingSubV src1 src2));
24823 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24824 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24825 ins_encode %{
24826 int vlen_enc = vector_length_encoding(this);
24827 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24828 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24829 $src1$$XMMRegister, $src2$$XMMRegister,
24830 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24831 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24832 %}
24833 ins_pipe(pipe_slow);
24834 %}
24835
24836 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24837 %{
24838 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24839 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24840 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24841 match(Set dst (SaturatingAddV src1 src2));
24842 match(Set dst (SaturatingSubV src1 src2));
24843 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24844 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24845 ins_encode %{
24846 int vlen_enc = vector_length_encoding(this);
24847 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24848 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24849 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24850 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24851 %}
24852 ins_pipe(pipe_slow);
24853 %}
24854
24855 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24856 %{
24857 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24858 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24859 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24860 match(Set dst (SaturatingAddV src1 src2));
24861 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24862 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24863 ins_encode %{
24864 int vlen_enc = vector_length_encoding(this);
24865 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24866 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24867 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24868 %}
24869 ins_pipe(pipe_slow);
24870 %}
24871
24872 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24873 %{
24874 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24875 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24876 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24877 match(Set dst (SaturatingAddV src1 src2));
24878 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24879 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24880 ins_encode %{
24881 int vlen_enc = vector_length_encoding(this);
24882 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24883 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24884 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24885 %}
24886 ins_pipe(pipe_slow);
24887 %}
24888
24889 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24890 %{
24891 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24892 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24893 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24894 match(Set dst (SaturatingSubV src1 src2));
24895 effect(TEMP ktmp);
24896 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24897 ins_encode %{
24898 int vlen_enc = vector_length_encoding(this);
24899 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24900 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24901 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24902 %}
24903 ins_pipe(pipe_slow);
24904 %}
24905
24906 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24907 %{
24908 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24909 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24910 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24911 match(Set dst (SaturatingSubV src1 src2));
24912 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24913 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24914 ins_encode %{
24915 int vlen_enc = vector_length_encoding(this);
24916 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24917 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24918 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24919 %}
24920 ins_pipe(pipe_slow);
24921 %}
24922
24923 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24924 %{
24925 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24926 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24927 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24928 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24929 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24930 ins_encode %{
24931 int vlen_enc = vector_length_encoding(this);
24932 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24933 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24934 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24935 %}
24936 ins_pipe(pipe_slow);
24937 %}
24938
24939 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24940 %{
24941 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24942 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24943 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24944 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24945 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24946 ins_encode %{
24947 int vlen_enc = vector_length_encoding(this);
24948 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24949 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24950 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24951 %}
24952 ins_pipe(pipe_slow);
24953 %}
24954
24955 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24956 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24957 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24958 match(Set dst (SaturatingAddV (Binary dst src) mask));
24959 match(Set dst (SaturatingSubV (Binary dst src) mask));
24960 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24961 ins_encode %{
24962 int vlen_enc = vector_length_encoding(this);
24963 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24964 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24965 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24966 %}
24967 ins_pipe( pipe_slow );
24968 %}
24969
24970 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24971 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24972 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24973 match(Set dst (SaturatingAddV (Binary dst src) mask));
24974 match(Set dst (SaturatingSubV (Binary dst src) mask));
24975 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24976 ins_encode %{
24977 int vlen_enc = vector_length_encoding(this);
24978 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24979 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24980 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24981 %}
24982 ins_pipe( pipe_slow );
24983 %}
24984
24985 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24986 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24987 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24988 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24989 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24990 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24991 ins_encode %{
24992 int vlen_enc = vector_length_encoding(this);
24993 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24994 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24995 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24996 %}
24997 ins_pipe( pipe_slow );
24998 %}
24999
25000 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25001 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25002 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25003 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25004 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25005 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25006 ins_encode %{
25007 int vlen_enc = vector_length_encoding(this);
25008 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25009 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25010 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25011 %}
25012 ins_pipe( pipe_slow );
25013 %}
25014
25015 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25016 %{
25017 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25018 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25019 ins_encode %{
25020 int vlen_enc = vector_length_encoding(this);
25021 BasicType bt = Matcher::vector_element_basic_type(this);
25022 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25023 %}
25024 ins_pipe(pipe_slow);
25025 %}
25026
25027 instruct reinterpretS2HF(regF dst, rRegI src)
25028 %{
25029 match(Set dst (ReinterpretS2HF src));
25030 format %{ "vmovw $dst, $src" %}
25031 ins_encode %{
25032 __ vmovw($dst$$XMMRegister, $src$$Register);
25033 %}
25034 ins_pipe(pipe_slow);
25035 %}
25036
25037 instruct reinterpretHF2S(rRegI dst, regF src)
25038 %{
25039 match(Set dst (ReinterpretHF2S src));
25040 format %{ "vmovw $dst, $src" %}
25041 ins_encode %{
25042 __ vmovw($dst$$Register, $src$$XMMRegister);
25043 %}
25044 ins_pipe(pipe_slow);
25045 %}
25046
25047 instruct convF2HFAndS2HF(regF dst, regF src)
25048 %{
25049 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25050 format %{ "convF2HFAndS2HF $dst, $src" %}
25051 ins_encode %{
25052 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25053 %}
25054 ins_pipe(pipe_slow);
25055 %}
25056
25057 instruct convHF2SAndHF2F(regF dst, regF src)
25058 %{
25059 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25060 format %{ "convHF2SAndHF2F $dst, $src" %}
25061 ins_encode %{
25062 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25063 %}
25064 ins_pipe(pipe_slow);
25065 %}
25066
25067 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25068 %{
25069 match(Set dst (SqrtHF src));
25070 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25071 ins_encode %{
25072 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25073 %}
25074 ins_pipe(pipe_slow);
25075 %}
25076
25077 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25078 %{
25079 match(Set dst (AddHF src1 src2));
25080 match(Set dst (DivHF src1 src2));
25081 match(Set dst (MulHF src1 src2));
25082 match(Set dst (SubHF src1 src2));
25083 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25084 ins_encode %{
25085 int opcode = this->ideal_Opcode();
25086 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25087 %}
25088 ins_pipe(pipe_slow);
25089 %}
25090
25091 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25092 %{
25093 predicate(VM_Version::supports_avx10_2());
25094 match(Set dst (MaxHF src1 src2));
25095 match(Set dst (MinHF src1 src2));
25096 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25097 ins_encode %{
25098 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25099 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25100 %}
25101 ins_pipe( pipe_slow );
25102 %}
25103
25104 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25105 %{
25106 predicate(!VM_Version::supports_avx10_2());
25107 match(Set dst (MaxHF src1 src2));
25108 match(Set dst (MinHF src1 src2));
25109 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25110 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25111 ins_encode %{
25112 int opcode = this->ideal_Opcode();
25113 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25114 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25115 %}
25116 ins_pipe( pipe_slow );
25117 %}
25118
25119 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25120 %{
25121 match(Set dst (FmaHF src2 (Binary dst src1)));
25122 effect(DEF dst);
25123 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25124 ins_encode %{
25125 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25126 %}
25127 ins_pipe( pipe_slow );
25128 %}
25129
25130
25131 instruct vector_sqrt_HF_reg(vec dst, vec src)
25132 %{
25133 match(Set dst (SqrtVHF src));
25134 format %{ "vector_sqrt_fp16 $dst, $src" %}
25135 ins_encode %{
25136 int vlen_enc = vector_length_encoding(this);
25137 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25138 %}
25139 ins_pipe(pipe_slow);
25140 %}
25141
25142 instruct vector_sqrt_HF_mem(vec dst, memory src)
25143 %{
25144 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25145 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25146 ins_encode %{
25147 int vlen_enc = vector_length_encoding(this);
25148 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25149 %}
25150 ins_pipe(pipe_slow);
25151 %}
25152
25153 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25154 %{
25155 match(Set dst (AddVHF src1 src2));
25156 match(Set dst (DivVHF src1 src2));
25157 match(Set dst (MulVHF src1 src2));
25158 match(Set dst (SubVHF src1 src2));
25159 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25160 ins_encode %{
25161 int vlen_enc = vector_length_encoding(this);
25162 int opcode = this->ideal_Opcode();
25163 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25164 %}
25165 ins_pipe(pipe_slow);
25166 %}
25167
25168
25169 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25170 %{
25171 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25172 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25173 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25174 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25175 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25176 ins_encode %{
25177 int vlen_enc = vector_length_encoding(this);
25178 int opcode = this->ideal_Opcode();
25179 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25180 %}
25181 ins_pipe(pipe_slow);
25182 %}
25183
25184 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25185 %{
25186 match(Set dst (FmaVHF src2 (Binary dst src1)));
25187 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25188 ins_encode %{
25189 int vlen_enc = vector_length_encoding(this);
25190 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25191 %}
25192 ins_pipe( pipe_slow );
25193 %}
25194
25195 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25196 %{
25197 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25198 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25199 ins_encode %{
25200 int vlen_enc = vector_length_encoding(this);
25201 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25202 %}
25203 ins_pipe( pipe_slow );
25204 %}
25205
25206 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25207 %{
25208 predicate(VM_Version::supports_avx10_2());
25209 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25210 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25211 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25212 ins_encode %{
25213 int vlen_enc = vector_length_encoding(this);
25214 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25215 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25216 %}
25217 ins_pipe( pipe_slow );
25218 %}
25219
25220 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25221 %{
25222 predicate(VM_Version::supports_avx10_2());
25223 match(Set dst (MinVHF src1 src2));
25224 match(Set dst (MaxVHF src1 src2));
25225 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25226 ins_encode %{
25227 int vlen_enc = vector_length_encoding(this);
25228 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25229 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25230 %}
25231 ins_pipe( pipe_slow );
25232 %}
25233
25234 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25235 %{
25236 predicate(!VM_Version::supports_avx10_2());
25237 match(Set dst (MinVHF src1 src2));
25238 match(Set dst (MaxVHF src1 src2));
25239 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25240 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25241 ins_encode %{
25242 int vlen_enc = vector_length_encoding(this);
25243 int opcode = this->ideal_Opcode();
25244 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25245 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25246 %}
25247 ins_pipe( pipe_slow );
25248 %}
25249
25250 //----------PEEPHOLE RULES-----------------------------------------------------
25251 // These must follow all instruction definitions as they use the names
25252 // defined in the instructions definitions.
25253 //
25254 // peeppredicate ( rule_predicate );
25255 // // the predicate unless which the peephole rule will be ignored
25256 //
25257 // peepmatch ( root_instr_name [preceding_instruction]* );
25258 //
25259 // peepprocedure ( procedure_name );
25260 // // provide a procedure name to perform the optimization, the procedure should
25261 // // reside in the architecture dependent peephole file, the method has the
25262 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25263 // // with the arguments being the basic block, the current node index inside the
25264 // // block, the register allocator, the functions upon invoked return a new node
25265 // // defined in peepreplace, and the rules of the nodes appearing in the
25266 // // corresponding peepmatch, the function return true if successful, else
25267 // // return false
25268 //
25269 // peepconstraint %{
25270 // (instruction_number.operand_name relational_op instruction_number.operand_name
25271 // [, ...] );
25272 // // instruction numbers are zero-based using left to right order in peepmatch
25273 //
25274 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25275 // // provide an instruction_number.operand_name for each operand that appears
25276 // // in the replacement instruction's match rule
25277 //
25278 // ---------VM FLAGS---------------------------------------------------------
25279 //
25280 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25281 //
25282 // Each peephole rule is given an identifying number starting with zero and
25283 // increasing by one in the order seen by the parser. An individual peephole
25284 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25285 // on the command-line.
25286 //
25287 // ---------CURRENT LIMITATIONS----------------------------------------------
25288 //
25289 // Only transformations inside a basic block (do we need more for peephole)
25290 //
25291 // ---------EXAMPLE----------------------------------------------------------
25292 //
25293 // // pertinent parts of existing instructions in architecture description
25294 // instruct movI(rRegI dst, rRegI src)
25295 // %{
25296 // match(Set dst (CopyI src));
25297 // %}
25298 //
25299 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25300 // %{
25301 // match(Set dst (AddI dst src));
25302 // effect(KILL cr);
25303 // %}
25304 //
25305 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25306 // %{
25307 // match(Set dst (AddI dst src));
25308 // %}
25309 //
25310 // 1. Simple replacement
25311 // - Only match adjacent instructions in same basic block
25312 // - Only equality constraints
25313 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25314 // - Only one replacement instruction
25315 //
25316 // // Change (inc mov) to lea
25317 // peephole %{
25318 // // lea should only be emitted when beneficial
25319 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25320 // // increment preceded by register-register move
25321 // peepmatch ( incI_rReg movI );
25322 // // require that the destination register of the increment
25323 // // match the destination register of the move
25324 // peepconstraint ( 0.dst == 1.dst );
25325 // // construct a replacement instruction that sets
25326 // // the destination to ( move's source register + one )
25327 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25328 // %}
25329 //
25330 // 2. Procedural replacement
25331 // - More flexible finding relevent nodes
25332 // - More flexible constraints
25333 // - More flexible transformations
25334 // - May utilise architecture-dependent API more effectively
25335 // - Currently only one replacement instruction due to adlc parsing capabilities
25336 //
25337 // // Change (inc mov) to lea
25338 // peephole %{
25339 // // lea should only be emitted when beneficial
25340 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25341 // // the rule numbers of these nodes inside are passed into the function below
25342 // peepmatch ( incI_rReg movI );
25343 // // the method that takes the responsibility of transformation
25344 // peepprocedure ( inc_mov_to_lea );
25345 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25346 // // node is passed into the function above
25347 // peepreplace ( leaI_rReg_immI() );
25348 // %}
25349
25350 // These instructions is not matched by the matcher but used by the peephole
25351 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25352 %{
25353 predicate(false);
25354 match(Set dst (AddI src1 src2));
25355 format %{ "leal $dst, [$src1 + $src2]" %}
25356 ins_encode %{
25357 Register dst = $dst$$Register;
25358 Register src1 = $src1$$Register;
25359 Register src2 = $src2$$Register;
25360 if (src1 != rbp && src1 != r13) {
25361 __ leal(dst, Address(src1, src2, Address::times_1));
25362 } else {
25363 assert(src2 != rbp && src2 != r13, "");
25364 __ leal(dst, Address(src2, src1, Address::times_1));
25365 }
25366 %}
25367 ins_pipe(ialu_reg_reg);
25368 %}
25369
25370 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25371 %{
25372 predicate(false);
25373 match(Set dst (AddI src1 src2));
25374 format %{ "leal $dst, [$src1 + $src2]" %}
25375 ins_encode %{
25376 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25377 %}
25378 ins_pipe(ialu_reg_reg);
25379 %}
25380
25381 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25382 %{
25383 predicate(false);
25384 match(Set dst (LShiftI src shift));
25385 format %{ "leal $dst, [$src << $shift]" %}
25386 ins_encode %{
25387 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25388 Register src = $src$$Register;
25389 if (scale == Address::times_2 && src != rbp && src != r13) {
25390 __ leal($dst$$Register, Address(src, src, Address::times_1));
25391 } else {
25392 __ leal($dst$$Register, Address(noreg, src, scale));
25393 }
25394 %}
25395 ins_pipe(ialu_reg_reg);
25396 %}
25397
25398 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25399 %{
25400 predicate(false);
25401 match(Set dst (AddL src1 src2));
25402 format %{ "leaq $dst, [$src1 + $src2]" %}
25403 ins_encode %{
25404 Register dst = $dst$$Register;
25405 Register src1 = $src1$$Register;
25406 Register src2 = $src2$$Register;
25407 if (src1 != rbp && src1 != r13) {
25408 __ leaq(dst, Address(src1, src2, Address::times_1));
25409 } else {
25410 assert(src2 != rbp && src2 != r13, "");
25411 __ leaq(dst, Address(src2, src1, Address::times_1));
25412 }
25413 %}
25414 ins_pipe(ialu_reg_reg);
25415 %}
25416
25417 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25418 %{
25419 predicate(false);
25420 match(Set dst (AddL src1 src2));
25421 format %{ "leaq $dst, [$src1 + $src2]" %}
25422 ins_encode %{
25423 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25424 %}
25425 ins_pipe(ialu_reg_reg);
25426 %}
25427
25428 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25429 %{
25430 predicate(false);
25431 match(Set dst (LShiftL src shift));
25432 format %{ "leaq $dst, [$src << $shift]" %}
25433 ins_encode %{
25434 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25435 Register src = $src$$Register;
25436 if (scale == Address::times_2 && src != rbp && src != r13) {
25437 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25438 } else {
25439 __ leaq($dst$$Register, Address(noreg, src, scale));
25440 }
25441 %}
25442 ins_pipe(ialu_reg_reg);
25443 %}
25444
25445 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25446 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25447 // processors with at least partial ALU support for lea
25448 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25449 // beneficial for processors with full ALU support
25450 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25451
25452 peephole
25453 %{
25454 peeppredicate(VM_Version::supports_fast_2op_lea());
25455 peepmatch (addI_rReg);
25456 peepprocedure (lea_coalesce_reg);
25457 peepreplace (leaI_rReg_rReg_peep());
25458 %}
25459
25460 peephole
25461 %{
25462 peeppredicate(VM_Version::supports_fast_2op_lea());
25463 peepmatch (addI_rReg_imm);
25464 peepprocedure (lea_coalesce_imm);
25465 peepreplace (leaI_rReg_immI_peep());
25466 %}
25467
25468 peephole
25469 %{
25470 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25471 VM_Version::is_intel_cascade_lake());
25472 peepmatch (incI_rReg);
25473 peepprocedure (lea_coalesce_imm);
25474 peepreplace (leaI_rReg_immI_peep());
25475 %}
25476
25477 peephole
25478 %{
25479 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25480 VM_Version::is_intel_cascade_lake());
25481 peepmatch (decI_rReg);
25482 peepprocedure (lea_coalesce_imm);
25483 peepreplace (leaI_rReg_immI_peep());
25484 %}
25485
25486 peephole
25487 %{
25488 peeppredicate(VM_Version::supports_fast_2op_lea());
25489 peepmatch (salI_rReg_immI2);
25490 peepprocedure (lea_coalesce_imm);
25491 peepreplace (leaI_rReg_immI2_peep());
25492 %}
25493
25494 peephole
25495 %{
25496 peeppredicate(VM_Version::supports_fast_2op_lea());
25497 peepmatch (addL_rReg);
25498 peepprocedure (lea_coalesce_reg);
25499 peepreplace (leaL_rReg_rReg_peep());
25500 %}
25501
25502 peephole
25503 %{
25504 peeppredicate(VM_Version::supports_fast_2op_lea());
25505 peepmatch (addL_rReg_imm);
25506 peepprocedure (lea_coalesce_imm);
25507 peepreplace (leaL_rReg_immL32_peep());
25508 %}
25509
25510 peephole
25511 %{
25512 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25513 VM_Version::is_intel_cascade_lake());
25514 peepmatch (incL_rReg);
25515 peepprocedure (lea_coalesce_imm);
25516 peepreplace (leaL_rReg_immL32_peep());
25517 %}
25518
25519 peephole
25520 %{
25521 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25522 VM_Version::is_intel_cascade_lake());
25523 peepmatch (decL_rReg);
25524 peepprocedure (lea_coalesce_imm);
25525 peepreplace (leaL_rReg_immL32_peep());
25526 %}
25527
25528 peephole
25529 %{
25530 peeppredicate(VM_Version::supports_fast_2op_lea());
25531 peepmatch (salL_rReg_immI2);
25532 peepprocedure (lea_coalesce_imm);
25533 peepreplace (leaL_rReg_immI2_peep());
25534 %}
25535
25536 peephole
25537 %{
25538 peepmatch (leaPCompressedOopOffset);
25539 peepprocedure (lea_remove_redundant);
25540 %}
25541
25542 peephole
25543 %{
25544 peepmatch (leaP8Narrow);
25545 peepprocedure (lea_remove_redundant);
25546 %}
25547
25548 peephole
25549 %{
25550 peepmatch (leaP32Narrow);
25551 peepprocedure (lea_remove_redundant);
25552 %}
25553
25554 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25555 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25556
25557 //int variant
25558 peephole
25559 %{
25560 peepmatch (testI_reg);
25561 peepprocedure (test_may_remove);
25562 %}
25563
25564 //long variant
25565 peephole
25566 %{
25567 peepmatch (testL_reg);
25568 peepprocedure (test_may_remove);
25569 %}
25570
25571
25572 //----------SMARTSPILL RULES---------------------------------------------------
25573 // These must follow all instruction definitions as they use the names
25574 // defined in the instructions definitions.