1 //
2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2637 assert(EnableVectorSupport, "sanity");
2638 int lo = XMM0_num;
2639 int hi = XMM0b_num;
2640 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2641 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2642 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2643 return OptoRegPair(hi, lo);
2644 }
2645
2646 // Is this branch offset short enough that a short branch can be used?
2647 //
2648 // NOTE: If the platform does not provide any short branch variants, then
2649 // this method should return false for offset 0.
2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2651 // The passed offset is relative to address of the branch.
2652 // On 86 a branch displacement is calculated relative to address
2653 // of a next instruction.
2654 offset -= br_size;
2655
2656 // the short version of jmpConUCF2 contains multiple branches,
2657 // making the reach slightly less
2658 if (rule == jmpConUCF2_rule)
2659 return (-126 <= offset && offset <= 125);
2660 return (-128 <= offset && offset <= 127);
2661 }
2662
2663 // Return whether or not this register is ever used as an argument.
2664 // This function is used on startup to build the trampoline stubs in
2665 // generateOptoStub. Registers not mentioned will be killed by the VM
2666 // call in the trampoline, and arguments in those registers not be
2667 // available to the callee.
2668 bool Matcher::can_be_java_arg(int reg)
2669 {
2670 return
2671 reg == RDI_num || reg == RDI_H_num ||
2672 reg == RSI_num || reg == RSI_H_num ||
2673 reg == RDX_num || reg == RDX_H_num ||
2674 reg == RCX_num || reg == RCX_H_num ||
2675 reg == R8_num || reg == R8_H_num ||
2676 reg == R9_num || reg == R9_H_num ||
2677 reg == R12_num || reg == R12_H_num ||
2678 reg == XMM0_num || reg == XMM0b_num ||
2679 reg == XMM1_num || reg == XMM1b_num ||
2680 reg == XMM2_num || reg == XMM2b_num ||
2681 reg == XMM3_num || reg == XMM3b_num ||
2682 reg == XMM4_num || reg == XMM4b_num ||
2683 reg == XMM5_num || reg == XMM5b_num ||
2684 reg == XMM6_num || reg == XMM6b_num ||
2685 reg == XMM7_num || reg == XMM7b_num;
2686 }
2687
2688 bool Matcher::is_spillable_arg(int reg)
2689 {
2690 return can_be_java_arg(reg);
2691 }
2692
2693 uint Matcher::int_pressure_limit()
2694 {
2695 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2696 }
2697
2698 uint Matcher::float_pressure_limit()
2699 {
2700 // After experiment around with different values, the following default threshold
2701 // works best for LCM's register pressure scheduling on x64.
2702 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2703 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2704 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2705 }
2706
2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2708 // In 64 bit mode a code which use multiply when
2709 // devisor is constant is faster than hardware
2710 // DIV instruction (it uses MulHiL).
2711 return false;
2712 }
2713
2714 // Register for DIVI projection of divmodI
2715 const RegMask& Matcher::divI_proj_mask() {
2716 return INT_RAX_REG_mask();
2717 }
2718
2719 // Register for MODI projection of divmodI
2720 const RegMask& Matcher::modI_proj_mask() {
2721 return INT_RDX_REG_mask();
2722 }
2723
2724 // Register for DIVL projection of divmodL
2725 const RegMask& Matcher::divL_proj_mask() {
2726 return LONG_RAX_REG_mask();
2727 }
2728
2729 // Register for MODL projection of divmodL
2730 const RegMask& Matcher::modL_proj_mask() {
2731 return LONG_RDX_REG_mask();
2732 }
2733
2734 %}
2735
2736 source_hpp %{
2737 // Header information of the source block.
2738 // Method declarations/definitions which are used outside
2739 // the ad-scope can conveniently be defined here.
2740 //
2741 // To keep related declarations/definitions/uses close together,
2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2743
2744 #include "runtime/vm_version.hpp"
2745
2746 class NativeJump;
2747
2748 class CallStubImpl {
2749
2750 //--------------------------------------------------------------
2751 //---< Used for optimization in Compile::shorten_branches >---
2752 //--------------------------------------------------------------
2753
2754 public:
2755 // Size of call trampoline stub.
2756 static uint size_call_trampoline() {
2757 return 0; // no call trampolines on this platform
2758 }
2759
2760 // number of relocations needed by a call trampoline stub
2761 static uint reloc_call_trampoline() {
2762 return 0; // no call trampolines on this platform
2763 }
2764 };
2765
2766 class HandlerImpl {
2767
2768 public:
2769
2770 static int emit_exception_handler(C2_MacroAssembler *masm);
2771 static int emit_deopt_handler(C2_MacroAssembler* masm);
2772
2773 static uint size_exception_handler() {
2774 // NativeCall instruction size is the same as NativeJump.
2775 // exception handler starts out as jump and can be patched to
2776 // a call be deoptimization. (4932387)
2777 // Note that this value is also credited (in output.cpp) to
2778 // the size of the code section.
2779 return NativeJump::instruction_size;
2780 }
2781
2782 static uint size_deopt_handler() {
2783 // three 5 byte instructions plus one move for unreachable address.
2784 return 15+3;
2785 }
2786 };
2787
2788 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2789 switch(bytes) {
2790 case 4: // fall-through
2791 case 8: // fall-through
2792 case 16: return Assembler::AVX_128bit;
2793 case 32: return Assembler::AVX_256bit;
2794 case 64: return Assembler::AVX_512bit;
2795
2796 default: {
2797 ShouldNotReachHere();
2798 return Assembler::AVX_NoVec;
2799 }
2800 }
2801 }
2802
2803 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2804 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2805 }
2806
2807 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2808 uint def_idx = use->operand_index(opnd);
2809 Node* def = use->in(def_idx);
2810 return vector_length_encoding(def);
2811 }
2812
2813 static inline bool is_vector_popcount_predicate(BasicType bt) {
2814 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2815 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2816 }
2817
2818 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2819 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2820 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2821 }
2822
2823 class Node::PD {
2824 public:
2825 enum NodeFlags {
2826 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2827 Flag_sets_carry_flag = Node::_last_flag << 2,
2828 Flag_sets_parity_flag = Node::_last_flag << 3,
2829 Flag_sets_zero_flag = Node::_last_flag << 4,
2830 Flag_sets_overflow_flag = Node::_last_flag << 5,
2831 Flag_sets_sign_flag = Node::_last_flag << 6,
2832 Flag_clears_carry_flag = Node::_last_flag << 7,
2833 Flag_clears_parity_flag = Node::_last_flag << 8,
2834 Flag_clears_zero_flag = Node::_last_flag << 9,
2835 Flag_clears_overflow_flag = Node::_last_flag << 10,
2836 Flag_clears_sign_flag = Node::_last_flag << 11,
2837 _last_flag = Flag_clears_sign_flag
2838 };
2839 };
2840
2841 %} // end source_hpp
2842
2843 source %{
2844
2845 #include "opto/addnode.hpp"
2846 #include "c2_intelJccErratum_x86.hpp"
2847
2848 void PhaseOutput::pd_perform_mach_node_analysis() {
2849 if (VM_Version::has_intel_jcc_erratum()) {
2850 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2851 _buf_sizes._code += extra_padding;
2852 }
2853 }
2854
2855 int MachNode::pd_alignment_required() const {
2856 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2857 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2858 return IntelJccErratum::largest_jcc_size() + 1;
2859 } else {
2860 return 1;
2861 }
2862 }
2863
2864 int MachNode::compute_padding(int current_offset) const {
2865 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2866 Compile* C = Compile::current();
2867 PhaseOutput* output = C->output();
2868 Block* block = output->block();
2869 int index = output->index();
2870 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2871 } else {
2872 return 0;
2873 }
2874 }
2875
2876 // Emit exception handler code.
2877 // Stuff framesize into a register and call a VM stub routine.
2878 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) {
2879
2880 // Note that the code buffer's insts_mark is always relative to insts.
2881 // That's why we must use the macroassembler to generate a handler.
2882 address base = __ start_a_stub(size_exception_handler());
2883 if (base == nullptr) {
2884 ciEnv::current()->record_failure("CodeCache is full");
2885 return 0; // CodeBuffer::expand failed
2886 }
2887 int offset = __ offset();
2888 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2889 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2890 __ end_a_stub();
2891 return offset;
2892 }
2893
2894 // Emit deopt handler code.
2895 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2896
2897 // Note that the code buffer's insts_mark is always relative to insts.
2898 // That's why we must use the macroassembler to generate a handler.
2899 address base = __ start_a_stub(size_deopt_handler());
2900 if (base == nullptr) {
2901 ciEnv::current()->record_failure("CodeCache is full");
2902 return 0; // CodeBuffer::expand failed
2903 }
2904 int offset = __ offset();
2905
2906 address the_pc = (address) __ pc();
2907 Label next;
2908 // push a "the_pc" on the stack without destroying any registers
2909 // as they all may be live.
2910
2911 // push address of "next"
2912 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
2913 __ bind(next);
2914 // adjust it so it matches "the_pc"
2915 __ subptr(Address(rsp, 0), __ offset() - offset);
2916
2917 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2918 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2919 __ end_a_stub();
2920 return offset;
2921 }
2922
2923 static Assembler::Width widthForType(BasicType bt) {
2924 if (bt == T_BYTE) {
2925 return Assembler::B;
2926 } else if (bt == T_SHORT) {
2927 return Assembler::W;
2928 } else if (bt == T_INT) {
2929 return Assembler::D;
2930 } else {
2931 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2932 return Assembler::Q;
2933 }
2934 }
2935
2936 //=============================================================================
2937
2938 // Float masks come from different places depending on platform.
2939 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2940 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2941 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2942 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2943 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2944 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2945 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2946 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2947 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2948 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2949 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2950 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2951 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2952 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2953 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2954 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2955 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2956 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2957 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2958
2959 //=============================================================================
2960 bool Matcher::match_rule_supported(int opcode) {
2961 if (!has_match_rule(opcode)) {
2962 return false; // no match rule present
2963 }
2964 switch (opcode) {
2965 case Op_AbsVL:
2966 case Op_StoreVectorScatter:
2967 if (UseAVX < 3) {
2968 return false;
2969 }
2970 break;
2971 case Op_PopCountI:
2972 case Op_PopCountL:
2973 if (!UsePopCountInstruction) {
2974 return false;
2975 }
2976 break;
2977 case Op_PopCountVI:
2978 if (UseAVX < 2) {
2979 return false;
2980 }
2981 break;
2982 case Op_CompressV:
2983 case Op_ExpandV:
2984 case Op_PopCountVL:
2985 if (UseAVX < 2) {
2986 return false;
2987 }
2988 break;
2989 case Op_MulVI:
2990 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
2991 return false;
2992 }
2993 break;
2994 case Op_MulVL:
2995 if (UseSSE < 4) { // only with SSE4_1 or AVX
2996 return false;
2997 }
2998 break;
2999 case Op_MulReductionVL:
3000 if (VM_Version::supports_avx512dq() == false) {
3001 return false;
3002 }
3003 break;
3004 case Op_AbsVB:
3005 case Op_AbsVS:
3006 case Op_AbsVI:
3007 case Op_AddReductionVI:
3008 case Op_AndReductionV:
3009 case Op_OrReductionV:
3010 case Op_XorReductionV:
3011 if (UseSSE < 3) { // requires at least SSSE3
3012 return false;
3013 }
3014 break;
3015 case Op_MaxHF:
3016 case Op_MinHF:
3017 if (!VM_Version::supports_avx512vlbw()) {
3018 return false;
3019 } // fallthrough
3020 case Op_AddHF:
3021 case Op_DivHF:
3022 case Op_FmaHF:
3023 case Op_MulHF:
3024 case Op_ReinterpretS2HF:
3025 case Op_ReinterpretHF2S:
3026 case Op_SubHF:
3027 case Op_SqrtHF:
3028 if (!VM_Version::supports_avx512_fp16()) {
3029 return false;
3030 }
3031 break;
3032 case Op_VectorLoadShuffle:
3033 case Op_VectorRearrange:
3034 case Op_MulReductionVI:
3035 if (UseSSE < 4) { // requires at least SSE4
3036 return false;
3037 }
3038 break;
3039 case Op_IsInfiniteF:
3040 case Op_IsInfiniteD:
3041 if (!VM_Version::supports_avx512dq()) {
3042 return false;
3043 }
3044 break;
3045 case Op_SqrtVD:
3046 case Op_SqrtVF:
3047 case Op_VectorMaskCmp:
3048 case Op_VectorCastB2X:
3049 case Op_VectorCastS2X:
3050 case Op_VectorCastI2X:
3051 case Op_VectorCastL2X:
3052 case Op_VectorCastF2X:
3053 case Op_VectorCastD2X:
3054 case Op_VectorUCastB2X:
3055 case Op_VectorUCastS2X:
3056 case Op_VectorUCastI2X:
3057 case Op_VectorMaskCast:
3058 if (UseAVX < 1) { // enabled for AVX only
3059 return false;
3060 }
3061 break;
3062 case Op_PopulateIndex:
3063 if (UseAVX < 2) {
3064 return false;
3065 }
3066 break;
3067 case Op_RoundVF:
3068 if (UseAVX < 2) { // enabled for AVX2 only
3069 return false;
3070 }
3071 break;
3072 case Op_RoundVD:
3073 if (UseAVX < 3) {
3074 return false; // enabled for AVX3 only
3075 }
3076 break;
3077 case Op_CompareAndSwapL:
3078 case Op_CompareAndSwapP:
3079 break;
3080 case Op_StrIndexOf:
3081 if (!UseSSE42Intrinsics) {
3082 return false;
3083 }
3084 break;
3085 case Op_StrIndexOfChar:
3086 if (!UseSSE42Intrinsics) {
3087 return false;
3088 }
3089 break;
3090 case Op_OnSpinWait:
3091 if (VM_Version::supports_on_spin_wait() == false) {
3092 return false;
3093 }
3094 break;
3095 case Op_MulVB:
3096 case Op_LShiftVB:
3097 case Op_RShiftVB:
3098 case Op_URShiftVB:
3099 case Op_VectorInsert:
3100 case Op_VectorLoadMask:
3101 case Op_VectorStoreMask:
3102 case Op_VectorBlend:
3103 if (UseSSE < 4) {
3104 return false;
3105 }
3106 break;
3107 case Op_MaxD:
3108 case Op_MaxF:
3109 case Op_MinD:
3110 case Op_MinF:
3111 if (UseAVX < 1) { // enabled for AVX only
3112 return false;
3113 }
3114 break;
3115 case Op_CacheWB:
3116 case Op_CacheWBPreSync:
3117 case Op_CacheWBPostSync:
3118 if (!VM_Version::supports_data_cache_line_flush()) {
3119 return false;
3120 }
3121 break;
3122 case Op_ExtractB:
3123 case Op_ExtractL:
3124 case Op_ExtractI:
3125 case Op_RoundDoubleMode:
3126 if (UseSSE < 4) {
3127 return false;
3128 }
3129 break;
3130 case Op_RoundDoubleModeV:
3131 if (VM_Version::supports_avx() == false) {
3132 return false; // 128bit vroundpd is not available
3133 }
3134 break;
3135 case Op_LoadVectorGather:
3136 case Op_LoadVectorGatherMasked:
3137 if (UseAVX < 2) {
3138 return false;
3139 }
3140 break;
3141 case Op_FmaF:
3142 case Op_FmaD:
3143 case Op_FmaVD:
3144 case Op_FmaVF:
3145 if (!UseFMA) {
3146 return false;
3147 }
3148 break;
3149 case Op_MacroLogicV:
3150 if (UseAVX < 3 || !UseVectorMacroLogic) {
3151 return false;
3152 }
3153 break;
3154
3155 case Op_VectorCmpMasked:
3156 case Op_VectorMaskGen:
3157 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3158 return false;
3159 }
3160 break;
3161 case Op_VectorMaskFirstTrue:
3162 case Op_VectorMaskLastTrue:
3163 case Op_VectorMaskTrueCount:
3164 case Op_VectorMaskToLong:
3165 if (UseAVX < 1) {
3166 return false;
3167 }
3168 break;
3169 case Op_RoundF:
3170 case Op_RoundD:
3171 break;
3172 case Op_CopySignD:
3173 case Op_CopySignF:
3174 if (UseAVX < 3) {
3175 return false;
3176 }
3177 if (!VM_Version::supports_avx512vl()) {
3178 return false;
3179 }
3180 break;
3181 case Op_CompressBits:
3182 case Op_ExpandBits:
3183 if (!VM_Version::supports_bmi2()) {
3184 return false;
3185 }
3186 break;
3187 case Op_CompressM:
3188 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3189 return false;
3190 }
3191 break;
3192 case Op_ConvF2HF:
3193 case Op_ConvHF2F:
3194 if (!VM_Version::supports_float16()) {
3195 return false;
3196 }
3197 break;
3198 case Op_VectorCastF2HF:
3199 case Op_VectorCastHF2F:
3200 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3201 return false;
3202 }
3203 break;
3204 }
3205 return true; // Match rules are supported by default.
3206 }
3207
3208 //------------------------------------------------------------------------
3209
3210 static inline bool is_pop_count_instr_target(BasicType bt) {
3211 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3212 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3213 }
3214
3215 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3216 return match_rule_supported_vector(opcode, vlen, bt);
3217 }
3218
3219 // Identify extra cases that we might want to provide match rules for vector nodes and
3220 // other intrinsics guarded with vector length (vlen) and element type (bt).
3221 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3222 if (!match_rule_supported(opcode)) {
3223 return false;
3224 }
3225 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3226 // * SSE2 supports 128bit vectors for all types;
3227 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3228 // * AVX2 supports 256bit vectors for all types;
3229 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3230 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3231 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3232 // And MaxVectorSize is taken into account as well.
3233 if (!vector_size_supported(bt, vlen)) {
3234 return false;
3235 }
3236 // Special cases which require vector length follow:
3237 // * implementation limitations
3238 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3239 // * 128bit vroundpd instruction is present only in AVX1
3240 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3241 switch (opcode) {
3242 case Op_MaxVHF:
3243 case Op_MinVHF:
3244 if (!VM_Version::supports_avx512bw()) {
3245 return false;
3246 }
3247 case Op_AddVHF:
3248 case Op_DivVHF:
3249 case Op_FmaVHF:
3250 case Op_MulVHF:
3251 case Op_SubVHF:
3252 case Op_SqrtVHF:
3253 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3254 return false;
3255 }
3256 if (!VM_Version::supports_avx512_fp16()) {
3257 return false;
3258 }
3259 break;
3260 case Op_AbsVF:
3261 case Op_NegVF:
3262 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3263 return false; // 512bit vandps and vxorps are not available
3264 }
3265 break;
3266 case Op_AbsVD:
3267 case Op_NegVD:
3268 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3269 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3270 }
3271 break;
3272 case Op_RotateRightV:
3273 case Op_RotateLeftV:
3274 if (bt != T_INT && bt != T_LONG) {
3275 return false;
3276 } // fallthrough
3277 case Op_MacroLogicV:
3278 if (!VM_Version::supports_evex() ||
3279 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3280 return false;
3281 }
3282 break;
3283 case Op_ClearArray:
3284 case Op_VectorMaskGen:
3285 case Op_VectorCmpMasked:
3286 if (!VM_Version::supports_avx512bw()) {
3287 return false;
3288 }
3289 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3290 return false;
3291 }
3292 break;
3293 case Op_LoadVectorMasked:
3294 case Op_StoreVectorMasked:
3295 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3296 return false;
3297 }
3298 break;
3299 case Op_UMinV:
3300 case Op_UMaxV:
3301 if (UseAVX == 0) {
3302 return false;
3303 }
3304 break;
3305 case Op_MaxV:
3306 case Op_MinV:
3307 if (UseSSE < 4 && is_integral_type(bt)) {
3308 return false;
3309 }
3310 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3311 // Float/Double intrinsics are enabled for AVX family currently.
3312 if (UseAVX == 0) {
3313 return false;
3314 }
3315 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3316 return false;
3317 }
3318 }
3319 break;
3320 case Op_CallLeafVector:
3321 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3322 return false;
3323 }
3324 break;
3325 case Op_AddReductionVI:
3326 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3327 return false;
3328 }
3329 // fallthrough
3330 case Op_AndReductionV:
3331 case Op_OrReductionV:
3332 case Op_XorReductionV:
3333 if (is_subword_type(bt) && (UseSSE < 4)) {
3334 return false;
3335 }
3336 break;
3337 case Op_MinReductionV:
3338 case Op_MaxReductionV:
3339 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3340 return false;
3341 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3342 return false;
3343 }
3344 // Float/Double intrinsics enabled for AVX family.
3345 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3346 return false;
3347 }
3348 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3349 return false;
3350 }
3351 break;
3352 case Op_VectorTest:
3353 if (UseSSE < 4) {
3354 return false; // Implementation limitation
3355 } else if (size_in_bits < 32) {
3356 return false; // Implementation limitation
3357 }
3358 break;
3359 case Op_VectorLoadShuffle:
3360 case Op_VectorRearrange:
3361 if(vlen == 2) {
3362 return false; // Implementation limitation due to how shuffle is loaded
3363 } else if (size_in_bits == 256 && UseAVX < 2) {
3364 return false; // Implementation limitation
3365 }
3366 break;
3367 case Op_VectorLoadMask:
3368 case Op_VectorMaskCast:
3369 if (size_in_bits == 256 && UseAVX < 2) {
3370 return false; // Implementation limitation
3371 }
3372 // fallthrough
3373 case Op_VectorStoreMask:
3374 if (vlen == 2) {
3375 return false; // Implementation limitation
3376 }
3377 break;
3378 case Op_PopulateIndex:
3379 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3380 return false;
3381 }
3382 break;
3383 case Op_VectorCastB2X:
3384 case Op_VectorCastS2X:
3385 case Op_VectorCastI2X:
3386 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3387 return false;
3388 }
3389 break;
3390 case Op_VectorCastL2X:
3391 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3392 return false;
3393 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3394 return false;
3395 }
3396 break;
3397 case Op_VectorCastF2X: {
3398 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3399 // happen after intermediate conversion to integer and special handling
3400 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3401 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3402 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3403 return false;
3404 }
3405 }
3406 // fallthrough
3407 case Op_VectorCastD2X:
3408 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3409 return false;
3410 }
3411 break;
3412 case Op_VectorCastF2HF:
3413 case Op_VectorCastHF2F:
3414 if (!VM_Version::supports_f16c() &&
3415 ((!VM_Version::supports_evex() ||
3416 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3417 return false;
3418 }
3419 break;
3420 case Op_RoundVD:
3421 if (!VM_Version::supports_avx512dq()) {
3422 return false;
3423 }
3424 break;
3425 case Op_MulReductionVI:
3426 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3427 return false;
3428 }
3429 break;
3430 case Op_LoadVectorGatherMasked:
3431 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3432 return false;
3433 }
3434 if (is_subword_type(bt) &&
3435 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3436 (size_in_bits < 64) ||
3437 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3438 return false;
3439 }
3440 break;
3441 case Op_StoreVectorScatterMasked:
3442 case Op_StoreVectorScatter:
3443 if (is_subword_type(bt)) {
3444 return false;
3445 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3446 return false;
3447 }
3448 // fallthrough
3449 case Op_LoadVectorGather:
3450 if (!is_subword_type(bt) && size_in_bits == 64) {
3451 return false;
3452 }
3453 if (is_subword_type(bt) && size_in_bits < 64) {
3454 return false;
3455 }
3456 break;
3457 case Op_SaturatingAddV:
3458 case Op_SaturatingSubV:
3459 if (UseAVX < 1) {
3460 return false; // Implementation limitation
3461 }
3462 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3463 return false;
3464 }
3465 break;
3466 case Op_SelectFromTwoVector:
3467 if (size_in_bits < 128) {
3468 return false;
3469 }
3470 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3471 return false;
3472 }
3473 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3474 return false;
3475 }
3476 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3477 return false;
3478 }
3479 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3480 return false;
3481 }
3482 break;
3483 case Op_MaskAll:
3484 if (!VM_Version::supports_evex()) {
3485 return false;
3486 }
3487 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3488 return false;
3489 }
3490 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3491 return false;
3492 }
3493 break;
3494 case Op_VectorMaskCmp:
3495 if (vlen < 2 || size_in_bits < 32) {
3496 return false;
3497 }
3498 break;
3499 case Op_CompressM:
3500 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3501 return false;
3502 }
3503 break;
3504 case Op_CompressV:
3505 case Op_ExpandV:
3506 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3507 return false;
3508 }
3509 if (size_in_bits < 128 ) {
3510 return false;
3511 }
3512 case Op_VectorLongToMask:
3513 if (UseAVX < 1) {
3514 return false;
3515 }
3516 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3517 return false;
3518 }
3519 break;
3520 case Op_SignumVD:
3521 case Op_SignumVF:
3522 if (UseAVX < 1) {
3523 return false;
3524 }
3525 break;
3526 case Op_PopCountVI:
3527 case Op_PopCountVL: {
3528 if (!is_pop_count_instr_target(bt) &&
3529 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3530 return false;
3531 }
3532 }
3533 break;
3534 case Op_ReverseV:
3535 case Op_ReverseBytesV:
3536 if (UseAVX < 2) {
3537 return false;
3538 }
3539 break;
3540 case Op_CountTrailingZerosV:
3541 case Op_CountLeadingZerosV:
3542 if (UseAVX < 2) {
3543 return false;
3544 }
3545 break;
3546 }
3547 return true; // Per default match rules are supported.
3548 }
3549
3550 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3551 // ADLC based match_rule_supported routine checks for the existence of pattern based
3552 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3553 // of their non-masked counterpart with mask edge being the differentiator.
3554 // This routine does a strict check on the existence of masked operation patterns
3555 // by returning a default false value for all the other opcodes apart from the
3556 // ones whose masked instruction patterns are defined in this file.
3557 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3558 return false;
3559 }
3560
3561 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3562 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3563 return false;
3564 }
3565 switch(opcode) {
3566 // Unary masked operations
3567 case Op_AbsVB:
3568 case Op_AbsVS:
3569 if(!VM_Version::supports_avx512bw()) {
3570 return false; // Implementation limitation
3571 }
3572 case Op_AbsVI:
3573 case Op_AbsVL:
3574 return true;
3575
3576 // Ternary masked operations
3577 case Op_FmaVF:
3578 case Op_FmaVD:
3579 return true;
3580
3581 case Op_MacroLogicV:
3582 if(bt != T_INT && bt != T_LONG) {
3583 return false;
3584 }
3585 return true;
3586
3587 // Binary masked operations
3588 case Op_AddVB:
3589 case Op_AddVS:
3590 case Op_SubVB:
3591 case Op_SubVS:
3592 case Op_MulVS:
3593 case Op_LShiftVS:
3594 case Op_RShiftVS:
3595 case Op_URShiftVS:
3596 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3597 if (!VM_Version::supports_avx512bw()) {
3598 return false; // Implementation limitation
3599 }
3600 return true;
3601
3602 case Op_MulVL:
3603 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3604 if (!VM_Version::supports_avx512dq()) {
3605 return false; // Implementation limitation
3606 }
3607 return true;
3608
3609 case Op_AndV:
3610 case Op_OrV:
3611 case Op_XorV:
3612 case Op_RotateRightV:
3613 case Op_RotateLeftV:
3614 if (bt != T_INT && bt != T_LONG) {
3615 return false; // Implementation limitation
3616 }
3617 return true;
3618
3619 case Op_VectorLoadMask:
3620 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3621 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3622 return false;
3623 }
3624 return true;
3625
3626 case Op_AddVI:
3627 case Op_AddVL:
3628 case Op_AddVF:
3629 case Op_AddVD:
3630 case Op_SubVI:
3631 case Op_SubVL:
3632 case Op_SubVF:
3633 case Op_SubVD:
3634 case Op_MulVI:
3635 case Op_MulVF:
3636 case Op_MulVD:
3637 case Op_DivVF:
3638 case Op_DivVD:
3639 case Op_SqrtVF:
3640 case Op_SqrtVD:
3641 case Op_LShiftVI:
3642 case Op_LShiftVL:
3643 case Op_RShiftVI:
3644 case Op_RShiftVL:
3645 case Op_URShiftVI:
3646 case Op_URShiftVL:
3647 case Op_LoadVectorMasked:
3648 case Op_StoreVectorMasked:
3649 case Op_LoadVectorGatherMasked:
3650 case Op_StoreVectorScatterMasked:
3651 return true;
3652
3653 case Op_UMinV:
3654 case Op_UMaxV:
3655 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3656 return false;
3657 } // fallthrough
3658 case Op_MaxV:
3659 case Op_MinV:
3660 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3661 return false; // Implementation limitation
3662 }
3663 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3664 return false; // Implementation limitation
3665 }
3666 return true;
3667 case Op_SaturatingAddV:
3668 case Op_SaturatingSubV:
3669 if (!is_subword_type(bt)) {
3670 return false;
3671 }
3672 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3673 return false; // Implementation limitation
3674 }
3675 return true;
3676
3677 case Op_VectorMaskCmp:
3678 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3679 return false; // Implementation limitation
3680 }
3681 return true;
3682
3683 case Op_VectorRearrange:
3684 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3685 return false; // Implementation limitation
3686 }
3687 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3688 return false; // Implementation limitation
3689 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3690 return false; // Implementation limitation
3691 }
3692 return true;
3693
3694 // Binary Logical operations
3695 case Op_AndVMask:
3696 case Op_OrVMask:
3697 case Op_XorVMask:
3698 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3699 return false; // Implementation limitation
3700 }
3701 return true;
3702
3703 case Op_PopCountVI:
3704 case Op_PopCountVL:
3705 if (!is_pop_count_instr_target(bt)) {
3706 return false;
3707 }
3708 return true;
3709
3710 case Op_MaskAll:
3711 return true;
3712
3713 case Op_CountLeadingZerosV:
3714 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3715 return true;
3716 }
3717 default:
3718 return false;
3719 }
3720 }
3721
3722 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3723 return false;
3724 }
3725
3726 // Return true if Vector::rearrange needs preparation of the shuffle argument
3727 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3728 switch (elem_bt) {
3729 case T_BYTE: return false;
3730 case T_SHORT: return !VM_Version::supports_avx512bw();
3731 case T_INT: return !VM_Version::supports_avx();
3732 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3733 default:
3734 ShouldNotReachHere();
3735 return false;
3736 }
3737 }
3738
3739 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3740 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3741 bool legacy = (generic_opnd->opcode() == LEGVEC);
3742 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3743 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3744 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3745 return new legVecZOper();
3746 }
3747 if (legacy) {
3748 switch (ideal_reg) {
3749 case Op_VecS: return new legVecSOper();
3750 case Op_VecD: return new legVecDOper();
3751 case Op_VecX: return new legVecXOper();
3752 case Op_VecY: return new legVecYOper();
3753 case Op_VecZ: return new legVecZOper();
3754 }
3755 } else {
3756 switch (ideal_reg) {
3757 case Op_VecS: return new vecSOper();
3758 case Op_VecD: return new vecDOper();
3759 case Op_VecX: return new vecXOper();
3760 case Op_VecY: return new vecYOper();
3761 case Op_VecZ: return new vecZOper();
3762 }
3763 }
3764 ShouldNotReachHere();
3765 return nullptr;
3766 }
3767
3768 bool Matcher::is_reg2reg_move(MachNode* m) {
3769 switch (m->rule()) {
3770 case MoveVec2Leg_rule:
3771 case MoveLeg2Vec_rule:
3772 case MoveF2VL_rule:
3773 case MoveF2LEG_rule:
3774 case MoveVL2F_rule:
3775 case MoveLEG2F_rule:
3776 case MoveD2VL_rule:
3777 case MoveD2LEG_rule:
3778 case MoveVL2D_rule:
3779 case MoveLEG2D_rule:
3780 return true;
3781 default:
3782 return false;
3783 }
3784 }
3785
3786 bool Matcher::is_generic_vector(MachOper* opnd) {
3787 switch (opnd->opcode()) {
3788 case VEC:
3789 case LEGVEC:
3790 return true;
3791 default:
3792 return false;
3793 }
3794 }
3795
3796 //------------------------------------------------------------------------
3797
3798 const RegMask* Matcher::predicate_reg_mask(void) {
3799 return &_VECTMASK_REG_mask;
3800 }
3801
3802 // Max vector size in bytes. 0 if not supported.
3803 int Matcher::vector_width_in_bytes(BasicType bt) {
3804 assert(is_java_primitive(bt), "only primitive type vectors");
3805 // SSE2 supports 128bit vectors for all types.
3806 // AVX2 supports 256bit vectors for all types.
3807 // AVX2/EVEX supports 512bit vectors for all types.
3808 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3809 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3810 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3811 size = (UseAVX > 2) ? 64 : 32;
3812 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3813 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3814 // Use flag to limit vector size.
3815 size = MIN2(size,(int)MaxVectorSize);
3816 // Minimum 2 values in vector (or 4 for bytes).
3817 switch (bt) {
3818 case T_DOUBLE:
3819 case T_LONG:
3820 if (size < 16) return 0;
3821 break;
3822 case T_FLOAT:
3823 case T_INT:
3824 if (size < 8) return 0;
3825 break;
3826 case T_BOOLEAN:
3827 if (size < 4) return 0;
3828 break;
3829 case T_CHAR:
3830 if (size < 4) return 0;
3831 break;
3832 case T_BYTE:
3833 if (size < 4) return 0;
3834 break;
3835 case T_SHORT:
3836 if (size < 4) return 0;
3837 break;
3838 default:
3839 ShouldNotReachHere();
3840 }
3841 return size;
3842 }
3843
3844 // Limits on vector size (number of elements) loaded into vector.
3845 int Matcher::max_vector_size(const BasicType bt) {
3846 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3847 }
3848 int Matcher::min_vector_size(const BasicType bt) {
3849 int max_size = max_vector_size(bt);
3850 // Min size which can be loaded into vector is 4 bytes.
3851 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3852 // Support for calling svml double64 vectors
3853 if (bt == T_DOUBLE) {
3854 size = 1;
3855 }
3856 return MIN2(size,max_size);
3857 }
3858
3859 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3860 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3861 // by default on Cascade Lake
3862 if (VM_Version::is_default_intel_cascade_lake()) {
3863 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3864 }
3865 return Matcher::max_vector_size(bt);
3866 }
3867
3868 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3869 return -1;
3870 }
3871
3872 // Vector ideal reg corresponding to specified size in bytes
3873 uint Matcher::vector_ideal_reg(int size) {
3874 assert(MaxVectorSize >= size, "");
3875 switch(size) {
3876 case 4: return Op_VecS;
3877 case 8: return Op_VecD;
3878 case 16: return Op_VecX;
3879 case 32: return Op_VecY;
3880 case 64: return Op_VecZ;
3881 }
3882 ShouldNotReachHere();
3883 return 0;
3884 }
3885
3886 // Check for shift by small constant as well
3887 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3888 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3889 shift->in(2)->get_int() <= 3 &&
3890 // Are there other uses besides address expressions?
3891 !matcher->is_visited(shift)) {
3892 address_visited.set(shift->_idx); // Flag as address_visited
3893 mstack.push(shift->in(2), Matcher::Visit);
3894 Node *conv = shift->in(1);
3895 // Allow Matcher to match the rule which bypass
3896 // ConvI2L operation for an array index on LP64
3897 // if the index value is positive.
3898 if (conv->Opcode() == Op_ConvI2L &&
3899 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3900 // Are there other uses besides address expressions?
3901 !matcher->is_visited(conv)) {
3902 address_visited.set(conv->_idx); // Flag as address_visited
3903 mstack.push(conv->in(1), Matcher::Pre_Visit);
3904 } else {
3905 mstack.push(conv, Matcher::Pre_Visit);
3906 }
3907 return true;
3908 }
3909 return false;
3910 }
3911
3912 // This function identifies sub-graphs in which a 'load' node is
3913 // input to two different nodes, and such that it can be matched
3914 // with BMI instructions like blsi, blsr, etc.
3915 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3916 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3917 // refers to the same node.
3918 //
3919 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3920 // This is a temporary solution until we make DAGs expressible in ADL.
3921 template<typename ConType>
3922 class FusedPatternMatcher {
3923 Node* _op1_node;
3924 Node* _mop_node;
3925 int _con_op;
3926
3927 static int match_next(Node* n, int next_op, int next_op_idx) {
3928 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3929 return -1;
3930 }
3931
3932 if (next_op_idx == -1) { // n is commutative, try rotations
3933 if (n->in(1)->Opcode() == next_op) {
3934 return 1;
3935 } else if (n->in(2)->Opcode() == next_op) {
3936 return 2;
3937 }
3938 } else {
3939 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3940 if (n->in(next_op_idx)->Opcode() == next_op) {
3941 return next_op_idx;
3942 }
3943 }
3944 return -1;
3945 }
3946
3947 public:
3948 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3949 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3950
3951 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
3952 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
3953 typename ConType::NativeType con_value) {
3954 if (_op1_node->Opcode() != op1) {
3955 return false;
3956 }
3957 if (_mop_node->outcnt() > 2) {
3958 return false;
3959 }
3960 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
3961 if (op1_op2_idx == -1) {
3962 return false;
3963 }
3964 // Memory operation must be the other edge
3965 int op1_mop_idx = (op1_op2_idx & 1) + 1;
3966
3967 // Check that the mop node is really what we want
3968 if (_op1_node->in(op1_mop_idx) == _mop_node) {
3969 Node* op2_node = _op1_node->in(op1_op2_idx);
3970 if (op2_node->outcnt() > 1) {
3971 return false;
3972 }
3973 assert(op2_node->Opcode() == op2, "Should be");
3974 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
3975 if (op2_con_idx == -1) {
3976 return false;
3977 }
3978 // Memory operation must be the other edge
3979 int op2_mop_idx = (op2_con_idx & 1) + 1;
3980 // Check that the memory operation is the same node
3981 if (op2_node->in(op2_mop_idx) == _mop_node) {
3982 // Now check the constant
3983 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
3984 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
3985 return true;
3986 }
3987 }
3988 }
3989 return false;
3990 }
3991 };
3992
3993 static bool is_bmi_pattern(Node* n, Node* m) {
3994 assert(UseBMI1Instructions, "sanity");
3995 if (n != nullptr && m != nullptr) {
3996 if (m->Opcode() == Op_LoadI) {
3997 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
3998 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
3999 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4000 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4001 } else if (m->Opcode() == Op_LoadL) {
4002 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4003 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4004 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4005 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4006 }
4007 }
4008 return false;
4009 }
4010
4011 // Should the matcher clone input 'm' of node 'n'?
4012 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4013 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4014 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4015 mstack.push(m, Visit);
4016 return true;
4017 }
4018 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4019 mstack.push(m, Visit); // m = ShiftCntV
4020 return true;
4021 }
4022 if (is_encode_and_store_pattern(n, m)) {
4023 mstack.push(m, Visit);
4024 return true;
4025 }
4026 return false;
4027 }
4028
4029 // Should the Matcher clone shifts on addressing modes, expecting them
4030 // to be subsumed into complex addressing expressions or compute them
4031 // into registers?
4032 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4033 Node *off = m->in(AddPNode::Offset);
4034 if (off->is_Con()) {
4035 address_visited.test_set(m->_idx); // Flag as address_visited
4036 Node *adr = m->in(AddPNode::Address);
4037
4038 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4039 // AtomicAdd is not an addressing expression.
4040 // Cheap to find it by looking for screwy base.
4041 if (adr->is_AddP() &&
4042 !adr->in(AddPNode::Base)->is_top() &&
4043 !adr->in(AddPNode::Offset)->is_Con() &&
4044 off->get_long() == (int) (off->get_long()) && // immL32
4045 // Are there other uses besides address expressions?
4046 !is_visited(adr)) {
4047 address_visited.set(adr->_idx); // Flag as address_visited
4048 Node *shift = adr->in(AddPNode::Offset);
4049 if (!clone_shift(shift, this, mstack, address_visited)) {
4050 mstack.push(shift, Pre_Visit);
4051 }
4052 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4053 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4054 } else {
4055 mstack.push(adr, Pre_Visit);
4056 }
4057
4058 // Clone X+offset as it also folds into most addressing expressions
4059 mstack.push(off, Visit);
4060 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4061 return true;
4062 } else if (clone_shift(off, this, mstack, address_visited)) {
4063 address_visited.test_set(m->_idx); // Flag as address_visited
4064 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4065 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4066 return true;
4067 }
4068 return false;
4069 }
4070
4071 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4072 switch (bt) {
4073 case BoolTest::eq:
4074 return Assembler::eq;
4075 case BoolTest::ne:
4076 return Assembler::neq;
4077 case BoolTest::le:
4078 case BoolTest::ule:
4079 return Assembler::le;
4080 case BoolTest::ge:
4081 case BoolTest::uge:
4082 return Assembler::nlt;
4083 case BoolTest::lt:
4084 case BoolTest::ult:
4085 return Assembler::lt;
4086 case BoolTest::gt:
4087 case BoolTest::ugt:
4088 return Assembler::nle;
4089 default : ShouldNotReachHere(); return Assembler::_false;
4090 }
4091 }
4092
4093 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4094 switch (bt) {
4095 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4096 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4097 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4098 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4099 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4100 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4101 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4102 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4103 }
4104 }
4105
4106 // Helper methods for MachSpillCopyNode::implementation().
4107 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4108 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4109 assert(ireg == Op_VecS || // 32bit vector
4110 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4111 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4112 "no non-adjacent vector moves" );
4113 if (masm) {
4114 switch (ireg) {
4115 case Op_VecS: // copy whole register
4116 case Op_VecD:
4117 case Op_VecX:
4118 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4119 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4120 } else {
4121 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4122 }
4123 break;
4124 case Op_VecY:
4125 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4126 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4127 } else {
4128 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4129 }
4130 break;
4131 case Op_VecZ:
4132 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4133 break;
4134 default:
4135 ShouldNotReachHere();
4136 }
4137 #ifndef PRODUCT
4138 } else {
4139 switch (ireg) {
4140 case Op_VecS:
4141 case Op_VecD:
4142 case Op_VecX:
4143 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4144 break;
4145 case Op_VecY:
4146 case Op_VecZ:
4147 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4148 break;
4149 default:
4150 ShouldNotReachHere();
4151 }
4152 #endif
4153 }
4154 }
4155
4156 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4157 int stack_offset, int reg, uint ireg, outputStream* st) {
4158 if (masm) {
4159 if (is_load) {
4160 switch (ireg) {
4161 case Op_VecS:
4162 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4163 break;
4164 case Op_VecD:
4165 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4166 break;
4167 case Op_VecX:
4168 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4169 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4170 } else {
4171 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4172 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4173 }
4174 break;
4175 case Op_VecY:
4176 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4177 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4178 } else {
4179 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4180 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4181 }
4182 break;
4183 case Op_VecZ:
4184 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4185 break;
4186 default:
4187 ShouldNotReachHere();
4188 }
4189 } else { // store
4190 switch (ireg) {
4191 case Op_VecS:
4192 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4193 break;
4194 case Op_VecD:
4195 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4196 break;
4197 case Op_VecX:
4198 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4199 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4200 }
4201 else {
4202 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4203 }
4204 break;
4205 case Op_VecY:
4206 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4207 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4208 }
4209 else {
4210 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4211 }
4212 break;
4213 case Op_VecZ:
4214 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4215 break;
4216 default:
4217 ShouldNotReachHere();
4218 }
4219 }
4220 #ifndef PRODUCT
4221 } else {
4222 if (is_load) {
4223 switch (ireg) {
4224 case Op_VecS:
4225 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4226 break;
4227 case Op_VecD:
4228 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4229 break;
4230 case Op_VecX:
4231 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4232 break;
4233 case Op_VecY:
4234 case Op_VecZ:
4235 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4236 break;
4237 default:
4238 ShouldNotReachHere();
4239 }
4240 } else { // store
4241 switch (ireg) {
4242 case Op_VecS:
4243 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4244 break;
4245 case Op_VecD:
4246 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4247 break;
4248 case Op_VecX:
4249 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4250 break;
4251 case Op_VecY:
4252 case Op_VecZ:
4253 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4254 break;
4255 default:
4256 ShouldNotReachHere();
4257 }
4258 }
4259 #endif
4260 }
4261 }
4262
4263 template <class T>
4264 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4265 int size = type2aelembytes(bt) * len;
4266 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4267 for (int i = 0; i < len; i++) {
4268 int offset = i * type2aelembytes(bt);
4269 switch (bt) {
4270 case T_BYTE: val->at(i) = con; break;
4271 case T_SHORT: {
4272 jshort c = con;
4273 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4274 break;
4275 }
4276 case T_INT: {
4277 jint c = con;
4278 memcpy(val->adr_at(offset), &c, sizeof(jint));
4279 break;
4280 }
4281 case T_LONG: {
4282 jlong c = con;
4283 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4284 break;
4285 }
4286 case T_FLOAT: {
4287 jfloat c = con;
4288 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4289 break;
4290 }
4291 case T_DOUBLE: {
4292 jdouble c = con;
4293 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4294 break;
4295 }
4296 default: assert(false, "%s", type2name(bt));
4297 }
4298 }
4299 return val;
4300 }
4301
4302 static inline jlong high_bit_set(BasicType bt) {
4303 switch (bt) {
4304 case T_BYTE: return 0x8080808080808080;
4305 case T_SHORT: return 0x8000800080008000;
4306 case T_INT: return 0x8000000080000000;
4307 case T_LONG: return 0x8000000000000000;
4308 default:
4309 ShouldNotReachHere();
4310 return 0;
4311 }
4312 }
4313
4314 #ifndef PRODUCT
4315 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4316 st->print("nop \t# %d bytes pad for loops and calls", _count);
4317 }
4318 #endif
4319
4320 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4321 __ nop(_count);
4322 }
4323
4324 uint MachNopNode::size(PhaseRegAlloc*) const {
4325 return _count;
4326 }
4327
4328 #ifndef PRODUCT
4329 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4330 st->print("# breakpoint");
4331 }
4332 #endif
4333
4334 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4335 __ int3();
4336 }
4337
4338 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4339 return MachNode::size(ra_);
4340 }
4341
4342 %}
4343
4344 //----------ENCODING BLOCK-----------------------------------------------------
4345 // This block specifies the encoding classes used by the compiler to
4346 // output byte streams. Encoding classes are parameterized macros
4347 // used by Machine Instruction Nodes in order to generate the bit
4348 // encoding of the instruction. Operands specify their base encoding
4349 // interface with the interface keyword. There are currently
4350 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4351 // COND_INTER. REG_INTER causes an operand to generate a function
4352 // which returns its register number when queried. CONST_INTER causes
4353 // an operand to generate a function which returns the value of the
4354 // constant when queried. MEMORY_INTER causes an operand to generate
4355 // four functions which return the Base Register, the Index Register,
4356 // the Scale Value, and the Offset Value of the operand when queried.
4357 // COND_INTER causes an operand to generate six functions which return
4358 // the encoding code (ie - encoding bits for the instruction)
4359 // associated with each basic boolean condition for a conditional
4360 // instruction.
4361 //
4362 // Instructions specify two basic values for encoding. Again, a
4363 // function is available to check if the constant displacement is an
4364 // oop. They use the ins_encode keyword to specify their encoding
4365 // classes (which must be a sequence of enc_class names, and their
4366 // parameters, specified in the encoding block), and they use the
4367 // opcode keyword to specify, in order, their primary, secondary, and
4368 // tertiary opcode. Only the opcode sections which a particular
4369 // instruction needs for encoding need to be specified.
4370 encode %{
4371 enc_class cdql_enc(no_rax_rdx_RegI div)
4372 %{
4373 // Full implementation of Java idiv and irem; checks for
4374 // special case as described in JVM spec., p.243 & p.271.
4375 //
4376 // normal case special case
4377 //
4378 // input : rax: dividend min_int
4379 // reg: divisor -1
4380 //
4381 // output: rax: quotient (= rax idiv reg) min_int
4382 // rdx: remainder (= rax irem reg) 0
4383 //
4384 // Code sequnce:
4385 //
4386 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4387 // 5: 75 07/08 jne e <normal>
4388 // 7: 33 d2 xor %edx,%edx
4389 // [div >= 8 -> offset + 1]
4390 // [REX_B]
4391 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4392 // c: 74 03/04 je 11 <done>
4393 // 000000000000000e <normal>:
4394 // e: 99 cltd
4395 // [div >= 8 -> offset + 1]
4396 // [REX_B]
4397 // f: f7 f9 idiv $div
4398 // 0000000000000011 <done>:
4399 Label normal;
4400 Label done;
4401
4402 // cmp $0x80000000,%eax
4403 __ cmpl(as_Register(RAX_enc), 0x80000000);
4404
4405 // jne e <normal>
4406 __ jccb(Assembler::notEqual, normal);
4407
4408 // xor %edx,%edx
4409 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4410
4411 // cmp $0xffffffffffffffff,%ecx
4412 __ cmpl($div$$Register, -1);
4413
4414 // je 11 <done>
4415 __ jccb(Assembler::equal, done);
4416
4417 // <normal>
4418 // cltd
4419 __ bind(normal);
4420 __ cdql();
4421
4422 // idivl
4423 // <done>
4424 __ idivl($div$$Register);
4425 __ bind(done);
4426 %}
4427
4428 enc_class cdqq_enc(no_rax_rdx_RegL div)
4429 %{
4430 // Full implementation of Java ldiv and lrem; checks for
4431 // special case as described in JVM spec., p.243 & p.271.
4432 //
4433 // normal case special case
4434 //
4435 // input : rax: dividend min_long
4436 // reg: divisor -1
4437 //
4438 // output: rax: quotient (= rax idiv reg) min_long
4439 // rdx: remainder (= rax irem reg) 0
4440 //
4441 // Code sequnce:
4442 //
4443 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4444 // 7: 00 00 80
4445 // a: 48 39 d0 cmp %rdx,%rax
4446 // d: 75 08 jne 17 <normal>
4447 // f: 33 d2 xor %edx,%edx
4448 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4449 // 15: 74 05 je 1c <done>
4450 // 0000000000000017 <normal>:
4451 // 17: 48 99 cqto
4452 // 19: 48 f7 f9 idiv $div
4453 // 000000000000001c <done>:
4454 Label normal;
4455 Label done;
4456
4457 // mov $0x8000000000000000,%rdx
4458 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4459
4460 // cmp %rdx,%rax
4461 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4462
4463 // jne 17 <normal>
4464 __ jccb(Assembler::notEqual, normal);
4465
4466 // xor %edx,%edx
4467 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4468
4469 // cmp $0xffffffffffffffff,$div
4470 __ cmpq($div$$Register, -1);
4471
4472 // je 1e <done>
4473 __ jccb(Assembler::equal, done);
4474
4475 // <normal>
4476 // cqto
4477 __ bind(normal);
4478 __ cdqq();
4479
4480 // idivq (note: must be emitted by the user of this rule)
4481 // <done>
4482 __ idivq($div$$Register);
4483 __ bind(done);
4484 %}
4485
4486 enc_class clear_avx %{
4487 DEBUG_ONLY(int off0 = __ offset());
4488 if (generate_vzeroupper(Compile::current())) {
4489 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4490 // Clear upper bits of YMM registers when current compiled code uses
4491 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4492 __ vzeroupper();
4493 }
4494 DEBUG_ONLY(int off1 = __ offset());
4495 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4496 %}
4497
4498 enc_class Java_To_Runtime(method meth) %{
4499 __ lea(r10, RuntimeAddress((address)$meth$$method));
4500 __ call(r10);
4501 __ post_call_nop();
4502 %}
4503
4504 enc_class Java_Static_Call(method meth)
4505 %{
4506 // JAVA STATIC CALL
4507 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4508 // determine who we intended to call.
4509 if (!_method) {
4510 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4511 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4512 // The NOP here is purely to ensure that eliding a call to
4513 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4514 __ addr_nop_5();
4515 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4516 } else {
4517 int method_index = resolved_method_index(masm);
4518 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4519 : static_call_Relocation::spec(method_index);
4520 address mark = __ pc();
4521 int call_offset = __ offset();
4522 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4523 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4524 // Calls of the same statically bound method can share
4525 // a stub to the interpreter.
4526 __ code()->shared_stub_to_interp_for(_method, call_offset);
4527 } else {
4528 // Emit stubs for static call.
4529 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4530 __ clear_inst_mark();
4531 if (stub == nullptr) {
4532 ciEnv::current()->record_failure("CodeCache is full");
4533 return;
4534 }
4535 }
4536 }
4537 __ post_call_nop();
4538 %}
4539
4540 enc_class Java_Dynamic_Call(method meth) %{
4541 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4542 __ post_call_nop();
4543 %}
4544
4545 enc_class call_epilog %{
4546 if (VerifyStackAtCalls) {
4547 // Check that stack depth is unchanged: find majik cookie on stack
4548 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4549 Label L;
4550 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4551 __ jccb(Assembler::equal, L);
4552 // Die if stack mismatch
4553 __ int3();
4554 __ bind(L);
4555 }
4556 %}
4557
4558 %}
4559
4560 //----------FRAME--------------------------------------------------------------
4561 // Definition of frame structure and management information.
4562 //
4563 // S T A C K L A Y O U T Allocators stack-slot number
4564 // | (to get allocators register number
4565 // G Owned by | | v add OptoReg::stack0())
4566 // r CALLER | |
4567 // o | +--------+ pad to even-align allocators stack-slot
4568 // w V | pad0 | numbers; owned by CALLER
4569 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4570 // h ^ | in | 5
4571 // | | args | 4 Holes in incoming args owned by SELF
4572 // | | | | 3
4573 // | | +--------+
4574 // V | | old out| Empty on Intel, window on Sparc
4575 // | old |preserve| Must be even aligned.
4576 // | SP-+--------+----> Matcher::_old_SP, even aligned
4577 // | | in | 3 area for Intel ret address
4578 // Owned by |preserve| Empty on Sparc.
4579 // SELF +--------+
4580 // | | pad2 | 2 pad to align old SP
4581 // | +--------+ 1
4582 // | | locks | 0
4583 // | +--------+----> OptoReg::stack0(), even aligned
4584 // | | pad1 | 11 pad to align new SP
4585 // | +--------+
4586 // | | | 10
4587 // | | spills | 9 spills
4588 // V | | 8 (pad0 slot for callee)
4589 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4590 // ^ | out | 7
4591 // | | args | 6 Holes in outgoing args owned by CALLEE
4592 // Owned by +--------+
4593 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4594 // | new |preserve| Must be even-aligned.
4595 // | SP-+--------+----> Matcher::_new_SP, even aligned
4596 // | | |
4597 //
4598 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4599 // known from SELF's arguments and the Java calling convention.
4600 // Region 6-7 is determined per call site.
4601 // Note 2: If the calling convention leaves holes in the incoming argument
4602 // area, those holes are owned by SELF. Holes in the outgoing area
4603 // are owned by the CALLEE. Holes should not be necessary in the
4604 // incoming area, as the Java calling convention is completely under
4605 // the control of the AD file. Doubles can be sorted and packed to
4606 // avoid holes. Holes in the outgoing arguments may be necessary for
4607 // varargs C calling conventions.
4608 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4609 // even aligned with pad0 as needed.
4610 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4611 // region 6-11 is even aligned; it may be padded out more so that
4612 // the region from SP to FP meets the minimum stack alignment.
4613 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4614 // alignment. Region 11, pad1, may be dynamically extended so that
4615 // SP meets the minimum alignment.
4616
4617 frame
4618 %{
4619 // These three registers define part of the calling convention
4620 // between compiled code and the interpreter.
4621 inline_cache_reg(RAX); // Inline Cache Register
4622
4623 // Optional: name the operand used by cisc-spilling to access
4624 // [stack_pointer + offset]
4625 cisc_spilling_operand_name(indOffset32);
4626
4627 // Number of stack slots consumed by locking an object
4628 sync_stack_slots(2);
4629
4630 // Compiled code's Frame Pointer
4631 frame_pointer(RSP);
4632
4633 // Interpreter stores its frame pointer in a register which is
4634 // stored to the stack by I2CAdaptors.
4635 // I2CAdaptors convert from interpreted java to compiled java.
4636 interpreter_frame_pointer(RBP);
4637
4638 // Stack alignment requirement
4639 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4640
4641 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4642 // for calls to C. Supports the var-args backing area for register parms.
4643 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4644
4645 // The after-PROLOG location of the return address. Location of
4646 // return address specifies a type (REG or STACK) and a number
4647 // representing the register number (i.e. - use a register name) or
4648 // stack slot.
4649 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4650 // Otherwise, it is above the locks and verification slot and alignment word
4651 return_addr(STACK - 2 +
4652 align_up((Compile::current()->in_preserve_stack_slots() +
4653 Compile::current()->fixed_slots()),
4654 stack_alignment_in_slots()));
4655
4656 // Location of compiled Java return values. Same as C for now.
4657 return_value
4658 %{
4659 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4660 "only return normal values");
4661
4662 static const int lo[Op_RegL + 1] = {
4663 0,
4664 0,
4665 RAX_num, // Op_RegN
4666 RAX_num, // Op_RegI
4667 RAX_num, // Op_RegP
4668 XMM0_num, // Op_RegF
4669 XMM0_num, // Op_RegD
4670 RAX_num // Op_RegL
4671 };
4672 static const int hi[Op_RegL + 1] = {
4673 0,
4674 0,
4675 OptoReg::Bad, // Op_RegN
4676 OptoReg::Bad, // Op_RegI
4677 RAX_H_num, // Op_RegP
4678 OptoReg::Bad, // Op_RegF
4679 XMM0b_num, // Op_RegD
4680 RAX_H_num // Op_RegL
4681 };
4682 // Excluded flags and vector registers.
4683 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4684 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4685 %}
4686 %}
4687
4688 //----------ATTRIBUTES---------------------------------------------------------
4689 //----------Operand Attributes-------------------------------------------------
4690 op_attrib op_cost(0); // Required cost attribute
4691
4692 //----------Instruction Attributes---------------------------------------------
4693 ins_attrib ins_cost(100); // Required cost attribute
4694 ins_attrib ins_size(8); // Required size attribute (in bits)
4695 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4696 // a non-matching short branch variant
4697 // of some long branch?
4698 ins_attrib ins_alignment(1); // Required alignment attribute (must
4699 // be a power of 2) specifies the
4700 // alignment that some part of the
4701 // instruction (not necessarily the
4702 // start) requires. If > 1, a
4703 // compute_padding() function must be
4704 // provided for the instruction
4705
4706 // Whether this node is expanded during code emission into a sequence of
4707 // instructions and the first instruction can perform an implicit null check.
4708 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4709
4710 //----------OPERANDS-----------------------------------------------------------
4711 // Operand definitions must precede instruction definitions for correct parsing
4712 // in the ADLC because operands constitute user defined types which are used in
4713 // instruction definitions.
4714
4715 //----------Simple Operands----------------------------------------------------
4716 // Immediate Operands
4717 // Integer Immediate
4718 operand immI()
4719 %{
4720 match(ConI);
4721
4722 op_cost(10);
4723 format %{ %}
4724 interface(CONST_INTER);
4725 %}
4726
4727 // Constant for test vs zero
4728 operand immI_0()
4729 %{
4730 predicate(n->get_int() == 0);
4731 match(ConI);
4732
4733 op_cost(0);
4734 format %{ %}
4735 interface(CONST_INTER);
4736 %}
4737
4738 // Constant for increment
4739 operand immI_1()
4740 %{
4741 predicate(n->get_int() == 1);
4742 match(ConI);
4743
4744 op_cost(0);
4745 format %{ %}
4746 interface(CONST_INTER);
4747 %}
4748
4749 // Constant for decrement
4750 operand immI_M1()
4751 %{
4752 predicate(n->get_int() == -1);
4753 match(ConI);
4754
4755 op_cost(0);
4756 format %{ %}
4757 interface(CONST_INTER);
4758 %}
4759
4760 operand immI_2()
4761 %{
4762 predicate(n->get_int() == 2);
4763 match(ConI);
4764
4765 op_cost(0);
4766 format %{ %}
4767 interface(CONST_INTER);
4768 %}
4769
4770 operand immI_4()
4771 %{
4772 predicate(n->get_int() == 4);
4773 match(ConI);
4774
4775 op_cost(0);
4776 format %{ %}
4777 interface(CONST_INTER);
4778 %}
4779
4780 operand immI_8()
4781 %{
4782 predicate(n->get_int() == 8);
4783 match(ConI);
4784
4785 op_cost(0);
4786 format %{ %}
4787 interface(CONST_INTER);
4788 %}
4789
4790 // Valid scale values for addressing modes
4791 operand immI2()
4792 %{
4793 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4794 match(ConI);
4795
4796 format %{ %}
4797 interface(CONST_INTER);
4798 %}
4799
4800 operand immU7()
4801 %{
4802 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4803 match(ConI);
4804
4805 op_cost(5);
4806 format %{ %}
4807 interface(CONST_INTER);
4808 %}
4809
4810 operand immI8()
4811 %{
4812 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4813 match(ConI);
4814
4815 op_cost(5);
4816 format %{ %}
4817 interface(CONST_INTER);
4818 %}
4819
4820 operand immU8()
4821 %{
4822 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4823 match(ConI);
4824
4825 op_cost(5);
4826 format %{ %}
4827 interface(CONST_INTER);
4828 %}
4829
4830 operand immI16()
4831 %{
4832 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4833 match(ConI);
4834
4835 op_cost(10);
4836 format %{ %}
4837 interface(CONST_INTER);
4838 %}
4839
4840 // Int Immediate non-negative
4841 operand immU31()
4842 %{
4843 predicate(n->get_int() >= 0);
4844 match(ConI);
4845
4846 op_cost(0);
4847 format %{ %}
4848 interface(CONST_INTER);
4849 %}
4850
4851 // Pointer Immediate
4852 operand immP()
4853 %{
4854 match(ConP);
4855
4856 op_cost(10);
4857 format %{ %}
4858 interface(CONST_INTER);
4859 %}
4860
4861 // Null Pointer Immediate
4862 operand immP0()
4863 %{
4864 predicate(n->get_ptr() == 0);
4865 match(ConP);
4866
4867 op_cost(5);
4868 format %{ %}
4869 interface(CONST_INTER);
4870 %}
4871
4872 // Pointer Immediate
4873 operand immN() %{
4874 match(ConN);
4875
4876 op_cost(10);
4877 format %{ %}
4878 interface(CONST_INTER);
4879 %}
4880
4881 operand immNKlass() %{
4882 match(ConNKlass);
4883
4884 op_cost(10);
4885 format %{ %}
4886 interface(CONST_INTER);
4887 %}
4888
4889 // Null Pointer Immediate
4890 operand immN0() %{
4891 predicate(n->get_narrowcon() == 0);
4892 match(ConN);
4893
4894 op_cost(5);
4895 format %{ %}
4896 interface(CONST_INTER);
4897 %}
4898
4899 operand immP31()
4900 %{
4901 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4902 && (n->get_ptr() >> 31) == 0);
4903 match(ConP);
4904
4905 op_cost(5);
4906 format %{ %}
4907 interface(CONST_INTER);
4908 %}
4909
4910
4911 // Long Immediate
4912 operand immL()
4913 %{
4914 match(ConL);
4915
4916 op_cost(20);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 // Long Immediate 8-bit
4922 operand immL8()
4923 %{
4924 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4925 match(ConL);
4926
4927 op_cost(5);
4928 format %{ %}
4929 interface(CONST_INTER);
4930 %}
4931
4932 // Long Immediate 32-bit unsigned
4933 operand immUL32()
4934 %{
4935 predicate(n->get_long() == (unsigned int) (n->get_long()));
4936 match(ConL);
4937
4938 op_cost(10);
4939 format %{ %}
4940 interface(CONST_INTER);
4941 %}
4942
4943 // Long Immediate 32-bit signed
4944 operand immL32()
4945 %{
4946 predicate(n->get_long() == (int) (n->get_long()));
4947 match(ConL);
4948
4949 op_cost(15);
4950 format %{ %}
4951 interface(CONST_INTER);
4952 %}
4953
4954 operand immL_Pow2()
4955 %{
4956 predicate(is_power_of_2((julong)n->get_long()));
4957 match(ConL);
4958
4959 op_cost(15);
4960 format %{ %}
4961 interface(CONST_INTER);
4962 %}
4963
4964 operand immL_NotPow2()
4965 %{
4966 predicate(is_power_of_2((julong)~n->get_long()));
4967 match(ConL);
4968
4969 op_cost(15);
4970 format %{ %}
4971 interface(CONST_INTER);
4972 %}
4973
4974 // Long Immediate zero
4975 operand immL0()
4976 %{
4977 predicate(n->get_long() == 0L);
4978 match(ConL);
4979
4980 op_cost(10);
4981 format %{ %}
4982 interface(CONST_INTER);
4983 %}
4984
4985 // Constant for increment
4986 operand immL1()
4987 %{
4988 predicate(n->get_long() == 1);
4989 match(ConL);
4990
4991 format %{ %}
4992 interface(CONST_INTER);
4993 %}
4994
4995 // Constant for decrement
4996 operand immL_M1()
4997 %{
4998 predicate(n->get_long() == -1);
4999 match(ConL);
5000
5001 format %{ %}
5002 interface(CONST_INTER);
5003 %}
5004
5005 // Long Immediate: low 32-bit mask
5006 operand immL_32bits()
5007 %{
5008 predicate(n->get_long() == 0xFFFFFFFFL);
5009 match(ConL);
5010 op_cost(20);
5011
5012 format %{ %}
5013 interface(CONST_INTER);
5014 %}
5015
5016 // Int Immediate: 2^n-1, positive
5017 operand immI_Pow2M1()
5018 %{
5019 predicate((n->get_int() > 0)
5020 && is_power_of_2((juint)n->get_int() + 1));
5021 match(ConI);
5022
5023 op_cost(20);
5024 format %{ %}
5025 interface(CONST_INTER);
5026 %}
5027
5028 // Float Immediate zero
5029 operand immF0()
5030 %{
5031 predicate(jint_cast(n->getf()) == 0);
5032 match(ConF);
5033
5034 op_cost(5);
5035 format %{ %}
5036 interface(CONST_INTER);
5037 %}
5038
5039 // Float Immediate
5040 operand immF()
5041 %{
5042 match(ConF);
5043
5044 op_cost(15);
5045 format %{ %}
5046 interface(CONST_INTER);
5047 %}
5048
5049 // Half Float Immediate
5050 operand immH()
5051 %{
5052 match(ConH);
5053
5054 op_cost(15);
5055 format %{ %}
5056 interface(CONST_INTER);
5057 %}
5058
5059 // Double Immediate zero
5060 operand immD0()
5061 %{
5062 predicate(jlong_cast(n->getd()) == 0);
5063 match(ConD);
5064
5065 op_cost(5);
5066 format %{ %}
5067 interface(CONST_INTER);
5068 %}
5069
5070 // Double Immediate
5071 operand immD()
5072 %{
5073 match(ConD);
5074
5075 op_cost(15);
5076 format %{ %}
5077 interface(CONST_INTER);
5078 %}
5079
5080 // Immediates for special shifts (sign extend)
5081
5082 // Constants for increment
5083 operand immI_16()
5084 %{
5085 predicate(n->get_int() == 16);
5086 match(ConI);
5087
5088 format %{ %}
5089 interface(CONST_INTER);
5090 %}
5091
5092 operand immI_24()
5093 %{
5094 predicate(n->get_int() == 24);
5095 match(ConI);
5096
5097 format %{ %}
5098 interface(CONST_INTER);
5099 %}
5100
5101 // Constant for byte-wide masking
5102 operand immI_255()
5103 %{
5104 predicate(n->get_int() == 255);
5105 match(ConI);
5106
5107 format %{ %}
5108 interface(CONST_INTER);
5109 %}
5110
5111 // Constant for short-wide masking
5112 operand immI_65535()
5113 %{
5114 predicate(n->get_int() == 65535);
5115 match(ConI);
5116
5117 format %{ %}
5118 interface(CONST_INTER);
5119 %}
5120
5121 // Constant for byte-wide masking
5122 operand immL_255()
5123 %{
5124 predicate(n->get_long() == 255);
5125 match(ConL);
5126
5127 format %{ %}
5128 interface(CONST_INTER);
5129 %}
5130
5131 // Constant for short-wide masking
5132 operand immL_65535()
5133 %{
5134 predicate(n->get_long() == 65535);
5135 match(ConL);
5136
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 // AOT Runtime Constants Address
5142 operand immAOTRuntimeConstantsAddress()
5143 %{
5144 // Check if the address is in the range of AOT Runtime Constants
5145 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5146 match(ConP);
5147
5148 op_cost(0);
5149 format %{ %}
5150 interface(CONST_INTER);
5151 %}
5152
5153 operand kReg()
5154 %{
5155 constraint(ALLOC_IN_RC(vectmask_reg));
5156 match(RegVectMask);
5157 format %{%}
5158 interface(REG_INTER);
5159 %}
5160
5161 // Register Operands
5162 // Integer Register
5163 operand rRegI()
5164 %{
5165 constraint(ALLOC_IN_RC(int_reg));
5166 match(RegI);
5167
5168 match(rax_RegI);
5169 match(rbx_RegI);
5170 match(rcx_RegI);
5171 match(rdx_RegI);
5172 match(rdi_RegI);
5173
5174 format %{ %}
5175 interface(REG_INTER);
5176 %}
5177
5178 // Special Registers
5179 operand rax_RegI()
5180 %{
5181 constraint(ALLOC_IN_RC(int_rax_reg));
5182 match(RegI);
5183 match(rRegI);
5184
5185 format %{ "RAX" %}
5186 interface(REG_INTER);
5187 %}
5188
5189 // Special Registers
5190 operand rbx_RegI()
5191 %{
5192 constraint(ALLOC_IN_RC(int_rbx_reg));
5193 match(RegI);
5194 match(rRegI);
5195
5196 format %{ "RBX" %}
5197 interface(REG_INTER);
5198 %}
5199
5200 operand rcx_RegI()
5201 %{
5202 constraint(ALLOC_IN_RC(int_rcx_reg));
5203 match(RegI);
5204 match(rRegI);
5205
5206 format %{ "RCX" %}
5207 interface(REG_INTER);
5208 %}
5209
5210 operand rdx_RegI()
5211 %{
5212 constraint(ALLOC_IN_RC(int_rdx_reg));
5213 match(RegI);
5214 match(rRegI);
5215
5216 format %{ "RDX" %}
5217 interface(REG_INTER);
5218 %}
5219
5220 operand rdi_RegI()
5221 %{
5222 constraint(ALLOC_IN_RC(int_rdi_reg));
5223 match(RegI);
5224 match(rRegI);
5225
5226 format %{ "RDI" %}
5227 interface(REG_INTER);
5228 %}
5229
5230 operand no_rax_rdx_RegI()
5231 %{
5232 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5233 match(RegI);
5234 match(rbx_RegI);
5235 match(rcx_RegI);
5236 match(rdi_RegI);
5237
5238 format %{ %}
5239 interface(REG_INTER);
5240 %}
5241
5242 operand no_rbp_r13_RegI()
5243 %{
5244 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5245 match(RegI);
5246 match(rRegI);
5247 match(rax_RegI);
5248 match(rbx_RegI);
5249 match(rcx_RegI);
5250 match(rdx_RegI);
5251 match(rdi_RegI);
5252
5253 format %{ %}
5254 interface(REG_INTER);
5255 %}
5256
5257 // Pointer Register
5258 operand any_RegP()
5259 %{
5260 constraint(ALLOC_IN_RC(any_reg));
5261 match(RegP);
5262 match(rax_RegP);
5263 match(rbx_RegP);
5264 match(rdi_RegP);
5265 match(rsi_RegP);
5266 match(rbp_RegP);
5267 match(r15_RegP);
5268 match(rRegP);
5269
5270 format %{ %}
5271 interface(REG_INTER);
5272 %}
5273
5274 operand rRegP()
5275 %{
5276 constraint(ALLOC_IN_RC(ptr_reg));
5277 match(RegP);
5278 match(rax_RegP);
5279 match(rbx_RegP);
5280 match(rdi_RegP);
5281 match(rsi_RegP);
5282 match(rbp_RegP); // See Q&A below about
5283 match(r15_RegP); // r15_RegP and rbp_RegP.
5284
5285 format %{ %}
5286 interface(REG_INTER);
5287 %}
5288
5289 operand rRegN() %{
5290 constraint(ALLOC_IN_RC(int_reg));
5291 match(RegN);
5292
5293 format %{ %}
5294 interface(REG_INTER);
5295 %}
5296
5297 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5298 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5299 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5300 // The output of an instruction is controlled by the allocator, which respects
5301 // register class masks, not match rules. Unless an instruction mentions
5302 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5303 // by the allocator as an input.
5304 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5305 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5306 // result, RBP is not included in the output of the instruction either.
5307
5308 // This operand is not allowed to use RBP even if
5309 // RBP is not used to hold the frame pointer.
5310 operand no_rbp_RegP()
5311 %{
5312 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5313 match(RegP);
5314 match(rbx_RegP);
5315 match(rsi_RegP);
5316 match(rdi_RegP);
5317
5318 format %{ %}
5319 interface(REG_INTER);
5320 %}
5321
5322 // Special Registers
5323 // Return a pointer value
5324 operand rax_RegP()
5325 %{
5326 constraint(ALLOC_IN_RC(ptr_rax_reg));
5327 match(RegP);
5328 match(rRegP);
5329
5330 format %{ %}
5331 interface(REG_INTER);
5332 %}
5333
5334 // Special Registers
5335 // Return a compressed pointer value
5336 operand rax_RegN()
5337 %{
5338 constraint(ALLOC_IN_RC(int_rax_reg));
5339 match(RegN);
5340 match(rRegN);
5341
5342 format %{ %}
5343 interface(REG_INTER);
5344 %}
5345
5346 // Used in AtomicAdd
5347 operand rbx_RegP()
5348 %{
5349 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5350 match(RegP);
5351 match(rRegP);
5352
5353 format %{ %}
5354 interface(REG_INTER);
5355 %}
5356
5357 operand rsi_RegP()
5358 %{
5359 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5360 match(RegP);
5361 match(rRegP);
5362
5363 format %{ %}
5364 interface(REG_INTER);
5365 %}
5366
5367 operand rbp_RegP()
5368 %{
5369 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5370 match(RegP);
5371 match(rRegP);
5372
5373 format %{ %}
5374 interface(REG_INTER);
5375 %}
5376
5377 // Used in rep stosq
5378 operand rdi_RegP()
5379 %{
5380 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5381 match(RegP);
5382 match(rRegP);
5383
5384 format %{ %}
5385 interface(REG_INTER);
5386 %}
5387
5388 operand r15_RegP()
5389 %{
5390 constraint(ALLOC_IN_RC(ptr_r15_reg));
5391 match(RegP);
5392 match(rRegP);
5393
5394 format %{ %}
5395 interface(REG_INTER);
5396 %}
5397
5398 operand rRegL()
5399 %{
5400 constraint(ALLOC_IN_RC(long_reg));
5401 match(RegL);
5402 match(rax_RegL);
5403 match(rdx_RegL);
5404
5405 format %{ %}
5406 interface(REG_INTER);
5407 %}
5408
5409 // Special Registers
5410 operand no_rax_rdx_RegL()
5411 %{
5412 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5413 match(RegL);
5414 match(rRegL);
5415
5416 format %{ %}
5417 interface(REG_INTER);
5418 %}
5419
5420 operand rax_RegL()
5421 %{
5422 constraint(ALLOC_IN_RC(long_rax_reg));
5423 match(RegL);
5424 match(rRegL);
5425
5426 format %{ "RAX" %}
5427 interface(REG_INTER);
5428 %}
5429
5430 operand rcx_RegL()
5431 %{
5432 constraint(ALLOC_IN_RC(long_rcx_reg));
5433 match(RegL);
5434 match(rRegL);
5435
5436 format %{ %}
5437 interface(REG_INTER);
5438 %}
5439
5440 operand rdx_RegL()
5441 %{
5442 constraint(ALLOC_IN_RC(long_rdx_reg));
5443 match(RegL);
5444 match(rRegL);
5445
5446 format %{ %}
5447 interface(REG_INTER);
5448 %}
5449
5450 operand r11_RegL()
5451 %{
5452 constraint(ALLOC_IN_RC(long_r11_reg));
5453 match(RegL);
5454 match(rRegL);
5455
5456 format %{ %}
5457 interface(REG_INTER);
5458 %}
5459
5460 operand no_rbp_r13_RegL()
5461 %{
5462 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5463 match(RegL);
5464 match(rRegL);
5465 match(rax_RegL);
5466 match(rcx_RegL);
5467 match(rdx_RegL);
5468
5469 format %{ %}
5470 interface(REG_INTER);
5471 %}
5472
5473 // Flags register, used as output of compare instructions
5474 operand rFlagsReg()
5475 %{
5476 constraint(ALLOC_IN_RC(int_flags));
5477 match(RegFlags);
5478
5479 format %{ "RFLAGS" %}
5480 interface(REG_INTER);
5481 %}
5482
5483 // Flags register, used as output of FLOATING POINT compare instructions
5484 operand rFlagsRegU()
5485 %{
5486 constraint(ALLOC_IN_RC(int_flags));
5487 match(RegFlags);
5488
5489 format %{ "RFLAGS_U" %}
5490 interface(REG_INTER);
5491 %}
5492
5493 operand rFlagsRegUCF() %{
5494 constraint(ALLOC_IN_RC(int_flags));
5495 match(RegFlags);
5496 predicate(false);
5497
5498 format %{ "RFLAGS_U_CF" %}
5499 interface(REG_INTER);
5500 %}
5501
5502 // Float register operands
5503 operand regF() %{
5504 constraint(ALLOC_IN_RC(float_reg));
5505 match(RegF);
5506
5507 format %{ %}
5508 interface(REG_INTER);
5509 %}
5510
5511 // Float register operands
5512 operand legRegF() %{
5513 constraint(ALLOC_IN_RC(float_reg_legacy));
5514 match(RegF);
5515
5516 format %{ %}
5517 interface(REG_INTER);
5518 %}
5519
5520 // Float register operands
5521 operand vlRegF() %{
5522 constraint(ALLOC_IN_RC(float_reg_vl));
5523 match(RegF);
5524
5525 format %{ %}
5526 interface(REG_INTER);
5527 %}
5528
5529 // Double register operands
5530 operand regD() %{
5531 constraint(ALLOC_IN_RC(double_reg));
5532 match(RegD);
5533
5534 format %{ %}
5535 interface(REG_INTER);
5536 %}
5537
5538 // Double register operands
5539 operand legRegD() %{
5540 constraint(ALLOC_IN_RC(double_reg_legacy));
5541 match(RegD);
5542
5543 format %{ %}
5544 interface(REG_INTER);
5545 %}
5546
5547 // Double register operands
5548 operand vlRegD() %{
5549 constraint(ALLOC_IN_RC(double_reg_vl));
5550 match(RegD);
5551
5552 format %{ %}
5553 interface(REG_INTER);
5554 %}
5555
5556 //----------Memory Operands----------------------------------------------------
5557 // Direct Memory Operand
5558 // operand direct(immP addr)
5559 // %{
5560 // match(addr);
5561
5562 // format %{ "[$addr]" %}
5563 // interface(MEMORY_INTER) %{
5564 // base(0xFFFFFFFF);
5565 // index(0x4);
5566 // scale(0x0);
5567 // disp($addr);
5568 // %}
5569 // %}
5570
5571 // Indirect Memory Operand
5572 operand indirect(any_RegP reg)
5573 %{
5574 constraint(ALLOC_IN_RC(ptr_reg));
5575 match(reg);
5576
5577 format %{ "[$reg]" %}
5578 interface(MEMORY_INTER) %{
5579 base($reg);
5580 index(0x4);
5581 scale(0x0);
5582 disp(0x0);
5583 %}
5584 %}
5585
5586 // Indirect Memory Plus Short Offset Operand
5587 operand indOffset8(any_RegP reg, immL8 off)
5588 %{
5589 constraint(ALLOC_IN_RC(ptr_reg));
5590 match(AddP reg off);
5591
5592 format %{ "[$reg + $off (8-bit)]" %}
5593 interface(MEMORY_INTER) %{
5594 base($reg);
5595 index(0x4);
5596 scale(0x0);
5597 disp($off);
5598 %}
5599 %}
5600
5601 // Indirect Memory Plus Long Offset Operand
5602 operand indOffset32(any_RegP reg, immL32 off)
5603 %{
5604 constraint(ALLOC_IN_RC(ptr_reg));
5605 match(AddP reg off);
5606
5607 format %{ "[$reg + $off (32-bit)]" %}
5608 interface(MEMORY_INTER) %{
5609 base($reg);
5610 index(0x4);
5611 scale(0x0);
5612 disp($off);
5613 %}
5614 %}
5615
5616 // Indirect Memory Plus Index Register Plus Offset Operand
5617 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5618 %{
5619 constraint(ALLOC_IN_RC(ptr_reg));
5620 match(AddP (AddP reg lreg) off);
5621
5622 op_cost(10);
5623 format %{"[$reg + $off + $lreg]" %}
5624 interface(MEMORY_INTER) %{
5625 base($reg);
5626 index($lreg);
5627 scale(0x0);
5628 disp($off);
5629 %}
5630 %}
5631
5632 // Indirect Memory Plus Index Register Plus Offset Operand
5633 operand indIndex(any_RegP reg, rRegL lreg)
5634 %{
5635 constraint(ALLOC_IN_RC(ptr_reg));
5636 match(AddP reg lreg);
5637
5638 op_cost(10);
5639 format %{"[$reg + $lreg]" %}
5640 interface(MEMORY_INTER) %{
5641 base($reg);
5642 index($lreg);
5643 scale(0x0);
5644 disp(0x0);
5645 %}
5646 %}
5647
5648 // Indirect Memory Times Scale Plus Index Register
5649 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5650 %{
5651 constraint(ALLOC_IN_RC(ptr_reg));
5652 match(AddP reg (LShiftL lreg scale));
5653
5654 op_cost(10);
5655 format %{"[$reg + $lreg << $scale]" %}
5656 interface(MEMORY_INTER) %{
5657 base($reg);
5658 index($lreg);
5659 scale($scale);
5660 disp(0x0);
5661 %}
5662 %}
5663
5664 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5665 %{
5666 constraint(ALLOC_IN_RC(ptr_reg));
5667 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5668 match(AddP reg (LShiftL (ConvI2L idx) scale));
5669
5670 op_cost(10);
5671 format %{"[$reg + pos $idx << $scale]" %}
5672 interface(MEMORY_INTER) %{
5673 base($reg);
5674 index($idx);
5675 scale($scale);
5676 disp(0x0);
5677 %}
5678 %}
5679
5680 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5681 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5682 %{
5683 constraint(ALLOC_IN_RC(ptr_reg));
5684 match(AddP (AddP reg (LShiftL lreg scale)) off);
5685
5686 op_cost(10);
5687 format %{"[$reg + $off + $lreg << $scale]" %}
5688 interface(MEMORY_INTER) %{
5689 base($reg);
5690 index($lreg);
5691 scale($scale);
5692 disp($off);
5693 %}
5694 %}
5695
5696 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5697 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5698 %{
5699 constraint(ALLOC_IN_RC(ptr_reg));
5700 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5701 match(AddP (AddP reg (ConvI2L idx)) off);
5702
5703 op_cost(10);
5704 format %{"[$reg + $off + $idx]" %}
5705 interface(MEMORY_INTER) %{
5706 base($reg);
5707 index($idx);
5708 scale(0x0);
5709 disp($off);
5710 %}
5711 %}
5712
5713 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5714 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5715 %{
5716 constraint(ALLOC_IN_RC(ptr_reg));
5717 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5718 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5719
5720 op_cost(10);
5721 format %{"[$reg + $off + $idx << $scale]" %}
5722 interface(MEMORY_INTER) %{
5723 base($reg);
5724 index($idx);
5725 scale($scale);
5726 disp($off);
5727 %}
5728 %}
5729
5730 // Indirect Narrow Oop Plus Offset Operand
5731 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5732 // we can't free r12 even with CompressedOops::base() == nullptr.
5733 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5734 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5735 constraint(ALLOC_IN_RC(ptr_reg));
5736 match(AddP (DecodeN reg) off);
5737
5738 op_cost(10);
5739 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5740 interface(MEMORY_INTER) %{
5741 base(0xc); // R12
5742 index($reg);
5743 scale(0x3);
5744 disp($off);
5745 %}
5746 %}
5747
5748 // Indirect Memory Operand
5749 operand indirectNarrow(rRegN reg)
5750 %{
5751 predicate(CompressedOops::shift() == 0);
5752 constraint(ALLOC_IN_RC(ptr_reg));
5753 match(DecodeN reg);
5754
5755 format %{ "[$reg]" %}
5756 interface(MEMORY_INTER) %{
5757 base($reg);
5758 index(0x4);
5759 scale(0x0);
5760 disp(0x0);
5761 %}
5762 %}
5763
5764 // Indirect Memory Plus Short Offset Operand
5765 operand indOffset8Narrow(rRegN reg, immL8 off)
5766 %{
5767 predicate(CompressedOops::shift() == 0);
5768 constraint(ALLOC_IN_RC(ptr_reg));
5769 match(AddP (DecodeN reg) off);
5770
5771 format %{ "[$reg + $off (8-bit)]" %}
5772 interface(MEMORY_INTER) %{
5773 base($reg);
5774 index(0x4);
5775 scale(0x0);
5776 disp($off);
5777 %}
5778 %}
5779
5780 // Indirect Memory Plus Long Offset Operand
5781 operand indOffset32Narrow(rRegN reg, immL32 off)
5782 %{
5783 predicate(CompressedOops::shift() == 0);
5784 constraint(ALLOC_IN_RC(ptr_reg));
5785 match(AddP (DecodeN reg) off);
5786
5787 format %{ "[$reg + $off (32-bit)]" %}
5788 interface(MEMORY_INTER) %{
5789 base($reg);
5790 index(0x4);
5791 scale(0x0);
5792 disp($off);
5793 %}
5794 %}
5795
5796 // Indirect Memory Plus Index Register Plus Offset Operand
5797 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5798 %{
5799 predicate(CompressedOops::shift() == 0);
5800 constraint(ALLOC_IN_RC(ptr_reg));
5801 match(AddP (AddP (DecodeN reg) lreg) off);
5802
5803 op_cost(10);
5804 format %{"[$reg + $off + $lreg]" %}
5805 interface(MEMORY_INTER) %{
5806 base($reg);
5807 index($lreg);
5808 scale(0x0);
5809 disp($off);
5810 %}
5811 %}
5812
5813 // Indirect Memory Plus Index Register Plus Offset Operand
5814 operand indIndexNarrow(rRegN reg, rRegL lreg)
5815 %{
5816 predicate(CompressedOops::shift() == 0);
5817 constraint(ALLOC_IN_RC(ptr_reg));
5818 match(AddP (DecodeN reg) lreg);
5819
5820 op_cost(10);
5821 format %{"[$reg + $lreg]" %}
5822 interface(MEMORY_INTER) %{
5823 base($reg);
5824 index($lreg);
5825 scale(0x0);
5826 disp(0x0);
5827 %}
5828 %}
5829
5830 // Indirect Memory Times Scale Plus Index Register
5831 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5832 %{
5833 predicate(CompressedOops::shift() == 0);
5834 constraint(ALLOC_IN_RC(ptr_reg));
5835 match(AddP (DecodeN reg) (LShiftL lreg scale));
5836
5837 op_cost(10);
5838 format %{"[$reg + $lreg << $scale]" %}
5839 interface(MEMORY_INTER) %{
5840 base($reg);
5841 index($lreg);
5842 scale($scale);
5843 disp(0x0);
5844 %}
5845 %}
5846
5847 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5848 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5849 %{
5850 predicate(CompressedOops::shift() == 0);
5851 constraint(ALLOC_IN_RC(ptr_reg));
5852 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5853
5854 op_cost(10);
5855 format %{"[$reg + $off + $lreg << $scale]" %}
5856 interface(MEMORY_INTER) %{
5857 base($reg);
5858 index($lreg);
5859 scale($scale);
5860 disp($off);
5861 %}
5862 %}
5863
5864 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5865 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5866 %{
5867 constraint(ALLOC_IN_RC(ptr_reg));
5868 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5869 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5870
5871 op_cost(10);
5872 format %{"[$reg + $off + $idx]" %}
5873 interface(MEMORY_INTER) %{
5874 base($reg);
5875 index($idx);
5876 scale(0x0);
5877 disp($off);
5878 %}
5879 %}
5880
5881 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5882 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5883 %{
5884 constraint(ALLOC_IN_RC(ptr_reg));
5885 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5886 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5887
5888 op_cost(10);
5889 format %{"[$reg + $off + $idx << $scale]" %}
5890 interface(MEMORY_INTER) %{
5891 base($reg);
5892 index($idx);
5893 scale($scale);
5894 disp($off);
5895 %}
5896 %}
5897
5898 //----------Special Memory Operands--------------------------------------------
5899 // Stack Slot Operand - This operand is used for loading and storing temporary
5900 // values on the stack where a match requires a value to
5901 // flow through memory.
5902 operand stackSlotP(sRegP reg)
5903 %{
5904 constraint(ALLOC_IN_RC(stack_slots));
5905 // No match rule because this operand is only generated in matching
5906
5907 format %{ "[$reg]" %}
5908 interface(MEMORY_INTER) %{
5909 base(0x4); // RSP
5910 index(0x4); // No Index
5911 scale(0x0); // No Scale
5912 disp($reg); // Stack Offset
5913 %}
5914 %}
5915
5916 operand stackSlotI(sRegI reg)
5917 %{
5918 constraint(ALLOC_IN_RC(stack_slots));
5919 // No match rule because this operand is only generated in matching
5920
5921 format %{ "[$reg]" %}
5922 interface(MEMORY_INTER) %{
5923 base(0x4); // RSP
5924 index(0x4); // No Index
5925 scale(0x0); // No Scale
5926 disp($reg); // Stack Offset
5927 %}
5928 %}
5929
5930 operand stackSlotF(sRegF reg)
5931 %{
5932 constraint(ALLOC_IN_RC(stack_slots));
5933 // No match rule because this operand is only generated in matching
5934
5935 format %{ "[$reg]" %}
5936 interface(MEMORY_INTER) %{
5937 base(0x4); // RSP
5938 index(0x4); // No Index
5939 scale(0x0); // No Scale
5940 disp($reg); // Stack Offset
5941 %}
5942 %}
5943
5944 operand stackSlotD(sRegD reg)
5945 %{
5946 constraint(ALLOC_IN_RC(stack_slots));
5947 // No match rule because this operand is only generated in matching
5948
5949 format %{ "[$reg]" %}
5950 interface(MEMORY_INTER) %{
5951 base(0x4); // RSP
5952 index(0x4); // No Index
5953 scale(0x0); // No Scale
5954 disp($reg); // Stack Offset
5955 %}
5956 %}
5957 operand stackSlotL(sRegL reg)
5958 %{
5959 constraint(ALLOC_IN_RC(stack_slots));
5960 // No match rule because this operand is only generated in matching
5961
5962 format %{ "[$reg]" %}
5963 interface(MEMORY_INTER) %{
5964 base(0x4); // RSP
5965 index(0x4); // No Index
5966 scale(0x0); // No Scale
5967 disp($reg); // Stack Offset
5968 %}
5969 %}
5970
5971 //----------Conditional Branch Operands----------------------------------------
5972 // Comparison Op - This is the operation of the comparison, and is limited to
5973 // the following set of codes:
5974 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5975 //
5976 // Other attributes of the comparison, such as unsignedness, are specified
5977 // by the comparison instruction that sets a condition code flags register.
5978 // That result is represented by a flags operand whose subtype is appropriate
5979 // to the unsignedness (etc.) of the comparison.
5980 //
5981 // Later, the instruction which matches both the Comparison Op (a Bool) and
5982 // the flags (produced by the Cmp) specifies the coding of the comparison op
5983 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5984
5985 // Comparison Code
5986 operand cmpOp()
5987 %{
5988 match(Bool);
5989
5990 format %{ "" %}
5991 interface(COND_INTER) %{
5992 equal(0x4, "e");
5993 not_equal(0x5, "ne");
5994 less(0xC, "l");
5995 greater_equal(0xD, "ge");
5996 less_equal(0xE, "le");
5997 greater(0xF, "g");
5998 overflow(0x0, "o");
5999 no_overflow(0x1, "no");
6000 %}
6001 %}
6002
6003 // Comparison Code, unsigned compare. Used by FP also, with
6004 // C2 (unordered) turned into GT or LT already. The other bits
6005 // C0 and C3 are turned into Carry & Zero flags.
6006 operand cmpOpU()
6007 %{
6008 match(Bool);
6009
6010 format %{ "" %}
6011 interface(COND_INTER) %{
6012 equal(0x4, "e");
6013 not_equal(0x5, "ne");
6014 less(0x2, "b");
6015 greater_equal(0x3, "ae");
6016 less_equal(0x6, "be");
6017 greater(0x7, "a");
6018 overflow(0x0, "o");
6019 no_overflow(0x1, "no");
6020 %}
6021 %}
6022
6023
6024 // Floating comparisons that don't require any fixup for the unordered case,
6025 // If both inputs of the comparison are the same, ZF is always set so we
6026 // don't need to use cmpOpUCF2 for eq/ne
6027 operand cmpOpUCF() %{
6028 match(Bool);
6029 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6030 n->as_Bool()->_test._test == BoolTest::ge ||
6031 n->as_Bool()->_test._test == BoolTest::le ||
6032 n->as_Bool()->_test._test == BoolTest::gt ||
6033 n->in(1)->in(1) == n->in(1)->in(2));
6034 format %{ "" %}
6035 interface(COND_INTER) %{
6036 equal(0xb, "np");
6037 not_equal(0xa, "p");
6038 less(0x2, "b");
6039 greater_equal(0x3, "ae");
6040 less_equal(0x6, "be");
6041 greater(0x7, "a");
6042 overflow(0x0, "o");
6043 no_overflow(0x1, "no");
6044 %}
6045 %}
6046
6047
6048 // Floating comparisons that can be fixed up with extra conditional jumps
6049 operand cmpOpUCF2() %{
6050 match(Bool);
6051 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6052 n->as_Bool()->_test._test == BoolTest::eq) &&
6053 n->in(1)->in(1) != n->in(1)->in(2));
6054 format %{ "" %}
6055 interface(COND_INTER) %{
6056 equal(0x4, "e");
6057 not_equal(0x5, "ne");
6058 less(0x2, "b");
6059 greater_equal(0x3, "ae");
6060 less_equal(0x6, "be");
6061 greater(0x7, "a");
6062 overflow(0x0, "o");
6063 no_overflow(0x1, "no");
6064 %}
6065 %}
6066
6067 // Operands for bound floating pointer register arguments
6068 operand rxmm0() %{
6069 constraint(ALLOC_IN_RC(xmm0_reg));
6070 match(VecX);
6071 format%{%}
6072 interface(REG_INTER);
6073 %}
6074
6075 // Vectors
6076
6077 // Dummy generic vector class. Should be used for all vector operands.
6078 // Replaced with vec[SDXYZ] during post-selection pass.
6079 operand vec() %{
6080 constraint(ALLOC_IN_RC(dynamic));
6081 match(VecX);
6082 match(VecY);
6083 match(VecZ);
6084 match(VecS);
6085 match(VecD);
6086
6087 format %{ %}
6088 interface(REG_INTER);
6089 %}
6090
6091 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6092 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6093 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6094 // runtime code generation via reg_class_dynamic.
6095 operand legVec() %{
6096 constraint(ALLOC_IN_RC(dynamic));
6097 match(VecX);
6098 match(VecY);
6099 match(VecZ);
6100 match(VecS);
6101 match(VecD);
6102
6103 format %{ %}
6104 interface(REG_INTER);
6105 %}
6106
6107 // Replaces vec during post-selection cleanup. See above.
6108 operand vecS() %{
6109 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6110 match(VecS);
6111
6112 format %{ %}
6113 interface(REG_INTER);
6114 %}
6115
6116 // Replaces legVec during post-selection cleanup. See above.
6117 operand legVecS() %{
6118 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6119 match(VecS);
6120
6121 format %{ %}
6122 interface(REG_INTER);
6123 %}
6124
6125 // Replaces vec during post-selection cleanup. See above.
6126 operand vecD() %{
6127 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6128 match(VecD);
6129
6130 format %{ %}
6131 interface(REG_INTER);
6132 %}
6133
6134 // Replaces legVec during post-selection cleanup. See above.
6135 operand legVecD() %{
6136 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6137 match(VecD);
6138
6139 format %{ %}
6140 interface(REG_INTER);
6141 %}
6142
6143 // Replaces vec during post-selection cleanup. See above.
6144 operand vecX() %{
6145 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6146 match(VecX);
6147
6148 format %{ %}
6149 interface(REG_INTER);
6150 %}
6151
6152 // Replaces legVec during post-selection cleanup. See above.
6153 operand legVecX() %{
6154 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6155 match(VecX);
6156
6157 format %{ %}
6158 interface(REG_INTER);
6159 %}
6160
6161 // Replaces vec during post-selection cleanup. See above.
6162 operand vecY() %{
6163 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6164 match(VecY);
6165
6166 format %{ %}
6167 interface(REG_INTER);
6168 %}
6169
6170 // Replaces legVec during post-selection cleanup. See above.
6171 operand legVecY() %{
6172 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6173 match(VecY);
6174
6175 format %{ %}
6176 interface(REG_INTER);
6177 %}
6178
6179 // Replaces vec during post-selection cleanup. See above.
6180 operand vecZ() %{
6181 constraint(ALLOC_IN_RC(vectorz_reg));
6182 match(VecZ);
6183
6184 format %{ %}
6185 interface(REG_INTER);
6186 %}
6187
6188 // Replaces legVec during post-selection cleanup. See above.
6189 operand legVecZ() %{
6190 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6191 match(VecZ);
6192
6193 format %{ %}
6194 interface(REG_INTER);
6195 %}
6196
6197 //----------OPERAND CLASSES----------------------------------------------------
6198 // Operand Classes are groups of operands that are used as to simplify
6199 // instruction definitions by not requiring the AD writer to specify separate
6200 // instructions for every form of operand when the instruction accepts
6201 // multiple operand types with the same basic encoding and format. The classic
6202 // case of this is memory operands.
6203
6204 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6205 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6206 indCompressedOopOffset,
6207 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6208 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6209 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6210
6211 //----------PIPELINE-----------------------------------------------------------
6212 // Rules which define the behavior of the target architectures pipeline.
6213 pipeline %{
6214
6215 //----------ATTRIBUTES---------------------------------------------------------
6216 attributes %{
6217 variable_size_instructions; // Fixed size instructions
6218 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6219 instruction_unit_size = 1; // An instruction is 1 bytes long
6220 instruction_fetch_unit_size = 16; // The processor fetches one line
6221 instruction_fetch_units = 1; // of 16 bytes
6222 %}
6223
6224 //----------RESOURCES----------------------------------------------------------
6225 // Resources are the functional units available to the machine
6226
6227 // Generic P2/P3 pipeline
6228 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6229 // 3 instructions decoded per cycle.
6230 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6231 // 3 ALU op, only ALU0 handles mul instructions.
6232 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6233 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6234 BR, FPU,
6235 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6236
6237 //----------PIPELINE DESCRIPTION-----------------------------------------------
6238 // Pipeline Description specifies the stages in the machine's pipeline
6239
6240 // Generic P2/P3 pipeline
6241 pipe_desc(S0, S1, S2, S3, S4, S5);
6242
6243 //----------PIPELINE CLASSES---------------------------------------------------
6244 // Pipeline Classes describe the stages in which input and output are
6245 // referenced by the hardware pipeline.
6246
6247 // Naming convention: ialu or fpu
6248 // Then: _reg
6249 // Then: _reg if there is a 2nd register
6250 // Then: _long if it's a pair of instructions implementing a long
6251 // Then: _fat if it requires the big decoder
6252 // Or: _mem if it requires the big decoder and a memory unit.
6253
6254 // Integer ALU reg operation
6255 pipe_class ialu_reg(rRegI dst)
6256 %{
6257 single_instruction;
6258 dst : S4(write);
6259 dst : S3(read);
6260 DECODE : S0; // any decoder
6261 ALU : S3; // any alu
6262 %}
6263
6264 // Long ALU reg operation
6265 pipe_class ialu_reg_long(rRegL dst)
6266 %{
6267 instruction_count(2);
6268 dst : S4(write);
6269 dst : S3(read);
6270 DECODE : S0(2); // any 2 decoders
6271 ALU : S3(2); // both alus
6272 %}
6273
6274 // Integer ALU reg operation using big decoder
6275 pipe_class ialu_reg_fat(rRegI dst)
6276 %{
6277 single_instruction;
6278 dst : S4(write);
6279 dst : S3(read);
6280 D0 : S0; // big decoder only
6281 ALU : S3; // any alu
6282 %}
6283
6284 // Integer ALU reg-reg operation
6285 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6286 %{
6287 single_instruction;
6288 dst : S4(write);
6289 src : S3(read);
6290 DECODE : S0; // any decoder
6291 ALU : S3; // any alu
6292 %}
6293
6294 // Integer ALU reg-reg operation
6295 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6296 %{
6297 single_instruction;
6298 dst : S4(write);
6299 src : S3(read);
6300 D0 : S0; // big decoder only
6301 ALU : S3; // any alu
6302 %}
6303
6304 // Integer ALU reg-mem operation
6305 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6306 %{
6307 single_instruction;
6308 dst : S5(write);
6309 mem : S3(read);
6310 D0 : S0; // big decoder only
6311 ALU : S4; // any alu
6312 MEM : S3; // any mem
6313 %}
6314
6315 // Integer mem operation (prefetch)
6316 pipe_class ialu_mem(memory mem)
6317 %{
6318 single_instruction;
6319 mem : S3(read);
6320 D0 : S0; // big decoder only
6321 MEM : S3; // any mem
6322 %}
6323
6324 // Integer Store to Memory
6325 pipe_class ialu_mem_reg(memory mem, rRegI src)
6326 %{
6327 single_instruction;
6328 mem : S3(read);
6329 src : S5(read);
6330 D0 : S0; // big decoder only
6331 ALU : S4; // any alu
6332 MEM : S3;
6333 %}
6334
6335 // // Long Store to Memory
6336 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6337 // %{
6338 // instruction_count(2);
6339 // mem : S3(read);
6340 // src : S5(read);
6341 // D0 : S0(2); // big decoder only; twice
6342 // ALU : S4(2); // any 2 alus
6343 // MEM : S3(2); // Both mems
6344 // %}
6345
6346 // Integer Store to Memory
6347 pipe_class ialu_mem_imm(memory mem)
6348 %{
6349 single_instruction;
6350 mem : S3(read);
6351 D0 : S0; // big decoder only
6352 ALU : S4; // any alu
6353 MEM : S3;
6354 %}
6355
6356 // Integer ALU0 reg-reg operation
6357 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6358 %{
6359 single_instruction;
6360 dst : S4(write);
6361 src : S3(read);
6362 D0 : S0; // Big decoder only
6363 ALU0 : S3; // only alu0
6364 %}
6365
6366 // Integer ALU0 reg-mem operation
6367 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6368 %{
6369 single_instruction;
6370 dst : S5(write);
6371 mem : S3(read);
6372 D0 : S0; // big decoder only
6373 ALU0 : S4; // ALU0 only
6374 MEM : S3; // any mem
6375 %}
6376
6377 // Integer ALU reg-reg operation
6378 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6379 %{
6380 single_instruction;
6381 cr : S4(write);
6382 src1 : S3(read);
6383 src2 : S3(read);
6384 DECODE : S0; // any decoder
6385 ALU : S3; // any alu
6386 %}
6387
6388 // Integer ALU reg-imm operation
6389 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6390 %{
6391 single_instruction;
6392 cr : S4(write);
6393 src1 : S3(read);
6394 DECODE : S0; // any decoder
6395 ALU : S3; // any alu
6396 %}
6397
6398 // Integer ALU reg-mem operation
6399 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6400 %{
6401 single_instruction;
6402 cr : S4(write);
6403 src1 : S3(read);
6404 src2 : S3(read);
6405 D0 : S0; // big decoder only
6406 ALU : S4; // any alu
6407 MEM : S3;
6408 %}
6409
6410 // Conditional move reg-reg
6411 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6412 %{
6413 instruction_count(4);
6414 y : S4(read);
6415 q : S3(read);
6416 p : S3(read);
6417 DECODE : S0(4); // any decoder
6418 %}
6419
6420 // Conditional move reg-reg
6421 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6422 %{
6423 single_instruction;
6424 dst : S4(write);
6425 src : S3(read);
6426 cr : S3(read);
6427 DECODE : S0; // any decoder
6428 %}
6429
6430 // Conditional move reg-mem
6431 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6432 %{
6433 single_instruction;
6434 dst : S4(write);
6435 src : S3(read);
6436 cr : S3(read);
6437 DECODE : S0; // any decoder
6438 MEM : S3;
6439 %}
6440
6441 // Conditional move reg-reg long
6442 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6443 %{
6444 single_instruction;
6445 dst : S4(write);
6446 src : S3(read);
6447 cr : S3(read);
6448 DECODE : S0(2); // any 2 decoders
6449 %}
6450
6451 // Float reg-reg operation
6452 pipe_class fpu_reg(regD dst)
6453 %{
6454 instruction_count(2);
6455 dst : S3(read);
6456 DECODE : S0(2); // any 2 decoders
6457 FPU : S3;
6458 %}
6459
6460 // Float reg-reg operation
6461 pipe_class fpu_reg_reg(regD dst, regD src)
6462 %{
6463 instruction_count(2);
6464 dst : S4(write);
6465 src : S3(read);
6466 DECODE : S0(2); // any 2 decoders
6467 FPU : S3;
6468 %}
6469
6470 // Float reg-reg operation
6471 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6472 %{
6473 instruction_count(3);
6474 dst : S4(write);
6475 src1 : S3(read);
6476 src2 : S3(read);
6477 DECODE : S0(3); // any 3 decoders
6478 FPU : S3(2);
6479 %}
6480
6481 // Float reg-reg operation
6482 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6483 %{
6484 instruction_count(4);
6485 dst : S4(write);
6486 src1 : S3(read);
6487 src2 : S3(read);
6488 src3 : S3(read);
6489 DECODE : S0(4); // any 3 decoders
6490 FPU : S3(2);
6491 %}
6492
6493 // Float reg-reg operation
6494 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6495 %{
6496 instruction_count(4);
6497 dst : S4(write);
6498 src1 : S3(read);
6499 src2 : S3(read);
6500 src3 : S3(read);
6501 DECODE : S1(3); // any 3 decoders
6502 D0 : S0; // Big decoder only
6503 FPU : S3(2);
6504 MEM : S3;
6505 %}
6506
6507 // Float reg-mem operation
6508 pipe_class fpu_reg_mem(regD dst, memory mem)
6509 %{
6510 instruction_count(2);
6511 dst : S5(write);
6512 mem : S3(read);
6513 D0 : S0; // big decoder only
6514 DECODE : S1; // any decoder for FPU POP
6515 FPU : S4;
6516 MEM : S3; // any mem
6517 %}
6518
6519 // Float reg-mem operation
6520 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6521 %{
6522 instruction_count(3);
6523 dst : S5(write);
6524 src1 : S3(read);
6525 mem : S3(read);
6526 D0 : S0; // big decoder only
6527 DECODE : S1(2); // any decoder for FPU POP
6528 FPU : S4;
6529 MEM : S3; // any mem
6530 %}
6531
6532 // Float mem-reg operation
6533 pipe_class fpu_mem_reg(memory mem, regD src)
6534 %{
6535 instruction_count(2);
6536 src : S5(read);
6537 mem : S3(read);
6538 DECODE : S0; // any decoder for FPU PUSH
6539 D0 : S1; // big decoder only
6540 FPU : S4;
6541 MEM : S3; // any mem
6542 %}
6543
6544 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6545 %{
6546 instruction_count(3);
6547 src1 : S3(read);
6548 src2 : S3(read);
6549 mem : S3(read);
6550 DECODE : S0(2); // any decoder for FPU PUSH
6551 D0 : S1; // big decoder only
6552 FPU : S4;
6553 MEM : S3; // any mem
6554 %}
6555
6556 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6557 %{
6558 instruction_count(3);
6559 src1 : S3(read);
6560 src2 : S3(read);
6561 mem : S4(read);
6562 DECODE : S0; // any decoder for FPU PUSH
6563 D0 : S0(2); // big decoder only
6564 FPU : S4;
6565 MEM : S3(2); // any mem
6566 %}
6567
6568 pipe_class fpu_mem_mem(memory dst, memory src1)
6569 %{
6570 instruction_count(2);
6571 src1 : S3(read);
6572 dst : S4(read);
6573 D0 : S0(2); // big decoder only
6574 MEM : S3(2); // any mem
6575 %}
6576
6577 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6578 %{
6579 instruction_count(3);
6580 src1 : S3(read);
6581 src2 : S3(read);
6582 dst : S4(read);
6583 D0 : S0(3); // big decoder only
6584 FPU : S4;
6585 MEM : S3(3); // any mem
6586 %}
6587
6588 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6589 %{
6590 instruction_count(3);
6591 src1 : S4(read);
6592 mem : S4(read);
6593 DECODE : S0; // any decoder for FPU PUSH
6594 D0 : S0(2); // big decoder only
6595 FPU : S4;
6596 MEM : S3(2); // any mem
6597 %}
6598
6599 // Float load constant
6600 pipe_class fpu_reg_con(regD dst)
6601 %{
6602 instruction_count(2);
6603 dst : S5(write);
6604 D0 : S0; // big decoder only for the load
6605 DECODE : S1; // any decoder for FPU POP
6606 FPU : S4;
6607 MEM : S3; // any mem
6608 %}
6609
6610 // Float load constant
6611 pipe_class fpu_reg_reg_con(regD dst, regD src)
6612 %{
6613 instruction_count(3);
6614 dst : S5(write);
6615 src : S3(read);
6616 D0 : S0; // big decoder only for the load
6617 DECODE : S1(2); // any decoder for FPU POP
6618 FPU : S4;
6619 MEM : S3; // any mem
6620 %}
6621
6622 // UnConditional branch
6623 pipe_class pipe_jmp(label labl)
6624 %{
6625 single_instruction;
6626 BR : S3;
6627 %}
6628
6629 // Conditional branch
6630 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6631 %{
6632 single_instruction;
6633 cr : S1(read);
6634 BR : S3;
6635 %}
6636
6637 // Allocation idiom
6638 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6639 %{
6640 instruction_count(1); force_serialization;
6641 fixed_latency(6);
6642 heap_ptr : S3(read);
6643 DECODE : S0(3);
6644 D0 : S2;
6645 MEM : S3;
6646 ALU : S3(2);
6647 dst : S5(write);
6648 BR : S5;
6649 %}
6650
6651 // Generic big/slow expanded idiom
6652 pipe_class pipe_slow()
6653 %{
6654 instruction_count(10); multiple_bundles; force_serialization;
6655 fixed_latency(100);
6656 D0 : S0(2);
6657 MEM : S3(2);
6658 %}
6659
6660 // The real do-nothing guy
6661 pipe_class empty()
6662 %{
6663 instruction_count(0);
6664 %}
6665
6666 // Define the class for the Nop node
6667 define
6668 %{
6669 MachNop = empty;
6670 %}
6671
6672 %}
6673
6674 //----------INSTRUCTIONS-------------------------------------------------------
6675 //
6676 // match -- States which machine-independent subtree may be replaced
6677 // by this instruction.
6678 // ins_cost -- The estimated cost of this instruction is used by instruction
6679 // selection to identify a minimum cost tree of machine
6680 // instructions that matches a tree of machine-independent
6681 // instructions.
6682 // format -- A string providing the disassembly for this instruction.
6683 // The value of an instruction's operand may be inserted
6684 // by referring to it with a '$' prefix.
6685 // opcode -- Three instruction opcodes may be provided. These are referred
6686 // to within an encode class as $primary, $secondary, and $tertiary
6687 // rrspectively. The primary opcode is commonly used to
6688 // indicate the type of machine instruction, while secondary
6689 // and tertiary are often used for prefix options or addressing
6690 // modes.
6691 // ins_encode -- A list of encode classes with parameters. The encode class
6692 // name must have been defined in an 'enc_class' specification
6693 // in the encode section of the architecture description.
6694
6695 // ============================================================================
6696
6697 instruct ShouldNotReachHere() %{
6698 match(Halt);
6699 format %{ "stop\t# ShouldNotReachHere" %}
6700 ins_encode %{
6701 if (is_reachable()) {
6702 const char* str = __ code_string(_halt_reason);
6703 __ stop(str);
6704 }
6705 %}
6706 ins_pipe(pipe_slow);
6707 %}
6708
6709 // ============================================================================
6710
6711 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6712 // Load Float
6713 instruct MoveF2VL(vlRegF dst, regF src) %{
6714 match(Set dst src);
6715 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6716 ins_encode %{
6717 ShouldNotReachHere();
6718 %}
6719 ins_pipe( fpu_reg_reg );
6720 %}
6721
6722 // Load Float
6723 instruct MoveF2LEG(legRegF dst, regF src) %{
6724 match(Set dst src);
6725 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6726 ins_encode %{
6727 ShouldNotReachHere();
6728 %}
6729 ins_pipe( fpu_reg_reg );
6730 %}
6731
6732 // Load Float
6733 instruct MoveVL2F(regF dst, vlRegF src) %{
6734 match(Set dst src);
6735 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6736 ins_encode %{
6737 ShouldNotReachHere();
6738 %}
6739 ins_pipe( fpu_reg_reg );
6740 %}
6741
6742 // Load Float
6743 instruct MoveLEG2F(regF dst, legRegF src) %{
6744 match(Set dst src);
6745 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6746 ins_encode %{
6747 ShouldNotReachHere();
6748 %}
6749 ins_pipe( fpu_reg_reg );
6750 %}
6751
6752 // Load Double
6753 instruct MoveD2VL(vlRegD dst, regD src) %{
6754 match(Set dst src);
6755 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6756 ins_encode %{
6757 ShouldNotReachHere();
6758 %}
6759 ins_pipe( fpu_reg_reg );
6760 %}
6761
6762 // Load Double
6763 instruct MoveD2LEG(legRegD dst, regD src) %{
6764 match(Set dst src);
6765 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6766 ins_encode %{
6767 ShouldNotReachHere();
6768 %}
6769 ins_pipe( fpu_reg_reg );
6770 %}
6771
6772 // Load Double
6773 instruct MoveVL2D(regD dst, vlRegD src) %{
6774 match(Set dst src);
6775 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6776 ins_encode %{
6777 ShouldNotReachHere();
6778 %}
6779 ins_pipe( fpu_reg_reg );
6780 %}
6781
6782 // Load Double
6783 instruct MoveLEG2D(regD dst, legRegD src) %{
6784 match(Set dst src);
6785 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6786 ins_encode %{
6787 ShouldNotReachHere();
6788 %}
6789 ins_pipe( fpu_reg_reg );
6790 %}
6791
6792 //----------Load/Store/Move Instructions---------------------------------------
6793 //----------Load Instructions--------------------------------------------------
6794
6795 // Load Byte (8 bit signed)
6796 instruct loadB(rRegI dst, memory mem)
6797 %{
6798 match(Set dst (LoadB mem));
6799
6800 ins_cost(125);
6801 format %{ "movsbl $dst, $mem\t# byte" %}
6802
6803 ins_encode %{
6804 __ movsbl($dst$$Register, $mem$$Address);
6805 %}
6806
6807 ins_pipe(ialu_reg_mem);
6808 %}
6809
6810 // Load Byte (8 bit signed) into Long Register
6811 instruct loadB2L(rRegL dst, memory mem)
6812 %{
6813 match(Set dst (ConvI2L (LoadB mem)));
6814
6815 ins_cost(125);
6816 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6817
6818 ins_encode %{
6819 __ movsbq($dst$$Register, $mem$$Address);
6820 %}
6821
6822 ins_pipe(ialu_reg_mem);
6823 %}
6824
6825 // Load Unsigned Byte (8 bit UNsigned)
6826 instruct loadUB(rRegI dst, memory mem)
6827 %{
6828 match(Set dst (LoadUB mem));
6829
6830 ins_cost(125);
6831 format %{ "movzbl $dst, $mem\t# ubyte" %}
6832
6833 ins_encode %{
6834 __ movzbl($dst$$Register, $mem$$Address);
6835 %}
6836
6837 ins_pipe(ialu_reg_mem);
6838 %}
6839
6840 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6841 instruct loadUB2L(rRegL dst, memory mem)
6842 %{
6843 match(Set dst (ConvI2L (LoadUB mem)));
6844
6845 ins_cost(125);
6846 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6847
6848 ins_encode %{
6849 __ movzbq($dst$$Register, $mem$$Address);
6850 %}
6851
6852 ins_pipe(ialu_reg_mem);
6853 %}
6854
6855 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6856 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6857 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6858 effect(KILL cr);
6859
6860 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6861 "andl $dst, right_n_bits($mask, 8)" %}
6862 ins_encode %{
6863 Register Rdst = $dst$$Register;
6864 __ movzbq(Rdst, $mem$$Address);
6865 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6866 %}
6867 ins_pipe(ialu_reg_mem);
6868 %}
6869
6870 // Load Short (16 bit signed)
6871 instruct loadS(rRegI dst, memory mem)
6872 %{
6873 match(Set dst (LoadS mem));
6874
6875 ins_cost(125);
6876 format %{ "movswl $dst, $mem\t# short" %}
6877
6878 ins_encode %{
6879 __ movswl($dst$$Register, $mem$$Address);
6880 %}
6881
6882 ins_pipe(ialu_reg_mem);
6883 %}
6884
6885 // Load Short (16 bit signed) to Byte (8 bit signed)
6886 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6887 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6888
6889 ins_cost(125);
6890 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6891 ins_encode %{
6892 __ movsbl($dst$$Register, $mem$$Address);
6893 %}
6894 ins_pipe(ialu_reg_mem);
6895 %}
6896
6897 // Load Short (16 bit signed) into Long Register
6898 instruct loadS2L(rRegL dst, memory mem)
6899 %{
6900 match(Set dst (ConvI2L (LoadS mem)));
6901
6902 ins_cost(125);
6903 format %{ "movswq $dst, $mem\t# short -> long" %}
6904
6905 ins_encode %{
6906 __ movswq($dst$$Register, $mem$$Address);
6907 %}
6908
6909 ins_pipe(ialu_reg_mem);
6910 %}
6911
6912 // Load Unsigned Short/Char (16 bit UNsigned)
6913 instruct loadUS(rRegI dst, memory mem)
6914 %{
6915 match(Set dst (LoadUS mem));
6916
6917 ins_cost(125);
6918 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6919
6920 ins_encode %{
6921 __ movzwl($dst$$Register, $mem$$Address);
6922 %}
6923
6924 ins_pipe(ialu_reg_mem);
6925 %}
6926
6927 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6928 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6929 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6930
6931 ins_cost(125);
6932 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6933 ins_encode %{
6934 __ movsbl($dst$$Register, $mem$$Address);
6935 %}
6936 ins_pipe(ialu_reg_mem);
6937 %}
6938
6939 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6940 instruct loadUS2L(rRegL dst, memory mem)
6941 %{
6942 match(Set dst (ConvI2L (LoadUS mem)));
6943
6944 ins_cost(125);
6945 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6946
6947 ins_encode %{
6948 __ movzwq($dst$$Register, $mem$$Address);
6949 %}
6950
6951 ins_pipe(ialu_reg_mem);
6952 %}
6953
6954 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6955 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6956 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6957
6958 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
6959 ins_encode %{
6960 __ movzbq($dst$$Register, $mem$$Address);
6961 %}
6962 ins_pipe(ialu_reg_mem);
6963 %}
6964
6965 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
6966 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6967 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6968 effect(KILL cr);
6969
6970 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
6971 "andl $dst, right_n_bits($mask, 16)" %}
6972 ins_encode %{
6973 Register Rdst = $dst$$Register;
6974 __ movzwq(Rdst, $mem$$Address);
6975 __ andl(Rdst, $mask$$constant & right_n_bits(16));
6976 %}
6977 ins_pipe(ialu_reg_mem);
6978 %}
6979
6980 // Load Integer
6981 instruct loadI(rRegI dst, memory mem)
6982 %{
6983 match(Set dst (LoadI mem));
6984
6985 ins_cost(125);
6986 format %{ "movl $dst, $mem\t# int" %}
6987
6988 ins_encode %{
6989 __ movl($dst$$Register, $mem$$Address);
6990 %}
6991
6992 ins_pipe(ialu_reg_mem);
6993 %}
6994
6995 // Load Integer (32 bit signed) to Byte (8 bit signed)
6996 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6997 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6998
6999 ins_cost(125);
7000 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7001 ins_encode %{
7002 __ movsbl($dst$$Register, $mem$$Address);
7003 %}
7004 ins_pipe(ialu_reg_mem);
7005 %}
7006
7007 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7008 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7009 match(Set dst (AndI (LoadI mem) mask));
7010
7011 ins_cost(125);
7012 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7013 ins_encode %{
7014 __ movzbl($dst$$Register, $mem$$Address);
7015 %}
7016 ins_pipe(ialu_reg_mem);
7017 %}
7018
7019 // Load Integer (32 bit signed) to Short (16 bit signed)
7020 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7021 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7022
7023 ins_cost(125);
7024 format %{ "movswl $dst, $mem\t# int -> short" %}
7025 ins_encode %{
7026 __ movswl($dst$$Register, $mem$$Address);
7027 %}
7028 ins_pipe(ialu_reg_mem);
7029 %}
7030
7031 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7032 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7033 match(Set dst (AndI (LoadI mem) mask));
7034
7035 ins_cost(125);
7036 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7037 ins_encode %{
7038 __ movzwl($dst$$Register, $mem$$Address);
7039 %}
7040 ins_pipe(ialu_reg_mem);
7041 %}
7042
7043 // Load Integer into Long Register
7044 instruct loadI2L(rRegL dst, memory mem)
7045 %{
7046 match(Set dst (ConvI2L (LoadI mem)));
7047
7048 ins_cost(125);
7049 format %{ "movslq $dst, $mem\t# int -> long" %}
7050
7051 ins_encode %{
7052 __ movslq($dst$$Register, $mem$$Address);
7053 %}
7054
7055 ins_pipe(ialu_reg_mem);
7056 %}
7057
7058 // Load Integer with mask 0xFF into Long Register
7059 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7060 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7061
7062 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7063 ins_encode %{
7064 __ movzbq($dst$$Register, $mem$$Address);
7065 %}
7066 ins_pipe(ialu_reg_mem);
7067 %}
7068
7069 // Load Integer with mask 0xFFFF into Long Register
7070 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7071 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7072
7073 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7074 ins_encode %{
7075 __ movzwq($dst$$Register, $mem$$Address);
7076 %}
7077 ins_pipe(ialu_reg_mem);
7078 %}
7079
7080 // Load Integer with a 31-bit mask into Long Register
7081 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7082 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7083 effect(KILL cr);
7084
7085 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7086 "andl $dst, $mask" %}
7087 ins_encode %{
7088 Register Rdst = $dst$$Register;
7089 __ movl(Rdst, $mem$$Address);
7090 __ andl(Rdst, $mask$$constant);
7091 %}
7092 ins_pipe(ialu_reg_mem);
7093 %}
7094
7095 // Load Unsigned Integer into Long Register
7096 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7097 %{
7098 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7099
7100 ins_cost(125);
7101 format %{ "movl $dst, $mem\t# uint -> long" %}
7102
7103 ins_encode %{
7104 __ movl($dst$$Register, $mem$$Address);
7105 %}
7106
7107 ins_pipe(ialu_reg_mem);
7108 %}
7109
7110 // Load Long
7111 instruct loadL(rRegL dst, memory mem)
7112 %{
7113 match(Set dst (LoadL mem));
7114
7115 ins_cost(125);
7116 format %{ "movq $dst, $mem\t# long" %}
7117
7118 ins_encode %{
7119 __ movq($dst$$Register, $mem$$Address);
7120 %}
7121
7122 ins_pipe(ialu_reg_mem); // XXX
7123 %}
7124
7125 // Load Range
7126 instruct loadRange(rRegI dst, memory mem)
7127 %{
7128 match(Set dst (LoadRange mem));
7129
7130 ins_cost(125); // XXX
7131 format %{ "movl $dst, $mem\t# range" %}
7132 ins_encode %{
7133 __ movl($dst$$Register, $mem$$Address);
7134 %}
7135 ins_pipe(ialu_reg_mem);
7136 %}
7137
7138 // Load Pointer
7139 instruct loadP(rRegP dst, memory mem)
7140 %{
7141 match(Set dst (LoadP mem));
7142 predicate(n->as_Load()->barrier_data() == 0);
7143
7144 ins_cost(125); // XXX
7145 format %{ "movq $dst, $mem\t# ptr" %}
7146 ins_encode %{
7147 __ movq($dst$$Register, $mem$$Address);
7148 %}
7149 ins_pipe(ialu_reg_mem); // XXX
7150 %}
7151
7152 // Load Compressed Pointer
7153 instruct loadN(rRegN dst, memory mem)
7154 %{
7155 predicate(n->as_Load()->barrier_data() == 0);
7156 match(Set dst (LoadN mem));
7157
7158 ins_cost(125); // XXX
7159 format %{ "movl $dst, $mem\t# compressed ptr" %}
7160 ins_encode %{
7161 __ movl($dst$$Register, $mem$$Address);
7162 %}
7163 ins_pipe(ialu_reg_mem); // XXX
7164 %}
7165
7166
7167 // Load Klass Pointer
7168 instruct loadKlass(rRegP dst, memory mem)
7169 %{
7170 match(Set dst (LoadKlass mem));
7171
7172 ins_cost(125); // XXX
7173 format %{ "movq $dst, $mem\t# class" %}
7174 ins_encode %{
7175 __ movq($dst$$Register, $mem$$Address);
7176 %}
7177 ins_pipe(ialu_reg_mem); // XXX
7178 %}
7179
7180 // Load narrow Klass Pointer
7181 instruct loadNKlass(rRegN dst, memory mem)
7182 %{
7183 predicate(!UseCompactObjectHeaders);
7184 match(Set dst (LoadNKlass mem));
7185
7186 ins_cost(125); // XXX
7187 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7188 ins_encode %{
7189 __ movl($dst$$Register, $mem$$Address);
7190 %}
7191 ins_pipe(ialu_reg_mem); // XXX
7192 %}
7193
7194 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7195 %{
7196 predicate(UseCompactObjectHeaders);
7197 match(Set dst (LoadNKlass mem));
7198 effect(KILL cr);
7199 ins_cost(125);
7200 format %{
7201 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7202 "shrl $dst, markWord::klass_shift_at_offset"
7203 %}
7204 ins_encode %{
7205 if (UseAPX) {
7206 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7207 }
7208 else {
7209 __ movl($dst$$Register, $mem$$Address);
7210 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7211 }
7212 %}
7213 ins_pipe(ialu_reg_mem);
7214 %}
7215
7216 // Load Float
7217 instruct loadF(regF dst, memory mem)
7218 %{
7219 match(Set dst (LoadF mem));
7220
7221 ins_cost(145); // XXX
7222 format %{ "movss $dst, $mem\t# float" %}
7223 ins_encode %{
7224 __ movflt($dst$$XMMRegister, $mem$$Address);
7225 %}
7226 ins_pipe(pipe_slow); // XXX
7227 %}
7228
7229 // Load Double
7230 instruct loadD_partial(regD dst, memory mem)
7231 %{
7232 predicate(!UseXmmLoadAndClearUpper);
7233 match(Set dst (LoadD mem));
7234
7235 ins_cost(145); // XXX
7236 format %{ "movlpd $dst, $mem\t# double" %}
7237 ins_encode %{
7238 __ movdbl($dst$$XMMRegister, $mem$$Address);
7239 %}
7240 ins_pipe(pipe_slow); // XXX
7241 %}
7242
7243 instruct loadD(regD dst, memory mem)
7244 %{
7245 predicate(UseXmmLoadAndClearUpper);
7246 match(Set dst (LoadD mem));
7247
7248 ins_cost(145); // XXX
7249 format %{ "movsd $dst, $mem\t# double" %}
7250 ins_encode %{
7251 __ movdbl($dst$$XMMRegister, $mem$$Address);
7252 %}
7253 ins_pipe(pipe_slow); // XXX
7254 %}
7255
7256 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7257 %{
7258 match(Set dst con);
7259
7260 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7261
7262 ins_encode %{
7263 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7264 %}
7265
7266 ins_pipe(ialu_reg_fat);
7267 %}
7268
7269 // max = java.lang.Math.max(float a, float b)
7270 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
7271 predicate(VM_Version::supports_avx10_2());
7272 match(Set dst (MaxF a b));
7273 format %{ "maxF $dst, $a, $b" %}
7274 ins_encode %{
7275 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7276 %}
7277 ins_pipe( pipe_slow );
7278 %}
7279
7280 // max = java.lang.Math.max(float a, float b)
7281 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7282 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7283 match(Set dst (MaxF a b));
7284 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7285 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7286 ins_encode %{
7287 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7288 %}
7289 ins_pipe( pipe_slow );
7290 %}
7291
7292 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7293 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7294 match(Set dst (MaxF a b));
7295 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7296
7297 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7298 ins_encode %{
7299 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7300 false /*min*/, true /*single*/);
7301 %}
7302 ins_pipe( pipe_slow );
7303 %}
7304
7305 // max = java.lang.Math.max(double a, double b)
7306 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
7307 predicate(VM_Version::supports_avx10_2());
7308 match(Set dst (MaxD a b));
7309 format %{ "maxD $dst, $a, $b" %}
7310 ins_encode %{
7311 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
7312 %}
7313 ins_pipe( pipe_slow );
7314 %}
7315
7316 // max = java.lang.Math.max(double a, double b)
7317 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7318 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7319 match(Set dst (MaxD a b));
7320 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7321 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7322 ins_encode %{
7323 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7324 %}
7325 ins_pipe( pipe_slow );
7326 %}
7327
7328 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7329 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7330 match(Set dst (MaxD a b));
7331 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7332
7333 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7334 ins_encode %{
7335 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7336 false /*min*/, false /*single*/);
7337 %}
7338 ins_pipe( pipe_slow );
7339 %}
7340
7341 // max = java.lang.Math.min(float a, float b)
7342 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
7343 predicate(VM_Version::supports_avx10_2());
7344 match(Set dst (MinF a b));
7345 format %{ "minF $dst, $a, $b" %}
7346 ins_encode %{
7347 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7348 %}
7349 ins_pipe( pipe_slow );
7350 %}
7351
7352 // min = java.lang.Math.min(float a, float b)
7353 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7354 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7355 match(Set dst (MinF a b));
7356 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7357 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7358 ins_encode %{
7359 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7360 %}
7361 ins_pipe( pipe_slow );
7362 %}
7363
7364 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7365 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7366 match(Set dst (MinF a b));
7367 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7368
7369 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7370 ins_encode %{
7371 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7372 true /*min*/, true /*single*/);
7373 %}
7374 ins_pipe( pipe_slow );
7375 %}
7376
7377 // max = java.lang.Math.min(double a, double b)
7378 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
7379 predicate(VM_Version::supports_avx10_2());
7380 match(Set dst (MinD a b));
7381 format %{ "minD $dst, $a, $b" %}
7382 ins_encode %{
7383 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
7384 %}
7385 ins_pipe( pipe_slow );
7386 %}
7387
7388 // min = java.lang.Math.min(double a, double b)
7389 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7390 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7391 match(Set dst (MinD a b));
7392 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7393 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7394 ins_encode %{
7395 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7396 %}
7397 ins_pipe( pipe_slow );
7398 %}
7399
7400 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7401 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7402 match(Set dst (MinD a b));
7403 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7404
7405 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7406 ins_encode %{
7407 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7408 true /*min*/, false /*single*/);
7409 %}
7410 ins_pipe( pipe_slow );
7411 %}
7412
7413 // Load Effective Address
7414 instruct leaP8(rRegP dst, indOffset8 mem)
7415 %{
7416 match(Set dst mem);
7417
7418 ins_cost(110); // XXX
7419 format %{ "leaq $dst, $mem\t# ptr 8" %}
7420 ins_encode %{
7421 __ leaq($dst$$Register, $mem$$Address);
7422 %}
7423 ins_pipe(ialu_reg_reg_fat);
7424 %}
7425
7426 instruct leaP32(rRegP dst, indOffset32 mem)
7427 %{
7428 match(Set dst mem);
7429
7430 ins_cost(110);
7431 format %{ "leaq $dst, $mem\t# ptr 32" %}
7432 ins_encode %{
7433 __ leaq($dst$$Register, $mem$$Address);
7434 %}
7435 ins_pipe(ialu_reg_reg_fat);
7436 %}
7437
7438 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7439 %{
7440 match(Set dst mem);
7441
7442 ins_cost(110);
7443 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7444 ins_encode %{
7445 __ leaq($dst$$Register, $mem$$Address);
7446 %}
7447 ins_pipe(ialu_reg_reg_fat);
7448 %}
7449
7450 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7451 %{
7452 match(Set dst mem);
7453
7454 ins_cost(110);
7455 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7456 ins_encode %{
7457 __ leaq($dst$$Register, $mem$$Address);
7458 %}
7459 ins_pipe(ialu_reg_reg_fat);
7460 %}
7461
7462 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7463 %{
7464 match(Set dst mem);
7465
7466 ins_cost(110);
7467 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7468 ins_encode %{
7469 __ leaq($dst$$Register, $mem$$Address);
7470 %}
7471 ins_pipe(ialu_reg_reg_fat);
7472 %}
7473
7474 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7475 %{
7476 match(Set dst mem);
7477
7478 ins_cost(110);
7479 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7480 ins_encode %{
7481 __ leaq($dst$$Register, $mem$$Address);
7482 %}
7483 ins_pipe(ialu_reg_reg_fat);
7484 %}
7485
7486 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7487 %{
7488 match(Set dst mem);
7489
7490 ins_cost(110);
7491 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7492 ins_encode %{
7493 __ leaq($dst$$Register, $mem$$Address);
7494 %}
7495 ins_pipe(ialu_reg_reg_fat);
7496 %}
7497
7498 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7499 %{
7500 match(Set dst mem);
7501
7502 ins_cost(110);
7503 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7504 ins_encode %{
7505 __ leaq($dst$$Register, $mem$$Address);
7506 %}
7507 ins_pipe(ialu_reg_reg_fat);
7508 %}
7509
7510 // Load Effective Address which uses Narrow (32-bits) oop
7511 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7512 %{
7513 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7514 match(Set dst mem);
7515
7516 ins_cost(110);
7517 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7518 ins_encode %{
7519 __ leaq($dst$$Register, $mem$$Address);
7520 %}
7521 ins_pipe(ialu_reg_reg_fat);
7522 %}
7523
7524 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7525 %{
7526 predicate(CompressedOops::shift() == 0);
7527 match(Set dst mem);
7528
7529 ins_cost(110); // XXX
7530 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7531 ins_encode %{
7532 __ leaq($dst$$Register, $mem$$Address);
7533 %}
7534 ins_pipe(ialu_reg_reg_fat);
7535 %}
7536
7537 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7538 %{
7539 predicate(CompressedOops::shift() == 0);
7540 match(Set dst mem);
7541
7542 ins_cost(110);
7543 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7544 ins_encode %{
7545 __ leaq($dst$$Register, $mem$$Address);
7546 %}
7547 ins_pipe(ialu_reg_reg_fat);
7548 %}
7549
7550 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7551 %{
7552 predicate(CompressedOops::shift() == 0);
7553 match(Set dst mem);
7554
7555 ins_cost(110);
7556 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7557 ins_encode %{
7558 __ leaq($dst$$Register, $mem$$Address);
7559 %}
7560 ins_pipe(ialu_reg_reg_fat);
7561 %}
7562
7563 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7564 %{
7565 predicate(CompressedOops::shift() == 0);
7566 match(Set dst mem);
7567
7568 ins_cost(110);
7569 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7570 ins_encode %{
7571 __ leaq($dst$$Register, $mem$$Address);
7572 %}
7573 ins_pipe(ialu_reg_reg_fat);
7574 %}
7575
7576 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7577 %{
7578 predicate(CompressedOops::shift() == 0);
7579 match(Set dst mem);
7580
7581 ins_cost(110);
7582 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7583 ins_encode %{
7584 __ leaq($dst$$Register, $mem$$Address);
7585 %}
7586 ins_pipe(ialu_reg_reg_fat);
7587 %}
7588
7589 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7590 %{
7591 predicate(CompressedOops::shift() == 0);
7592 match(Set dst mem);
7593
7594 ins_cost(110);
7595 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7596 ins_encode %{
7597 __ leaq($dst$$Register, $mem$$Address);
7598 %}
7599 ins_pipe(ialu_reg_reg_fat);
7600 %}
7601
7602 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7603 %{
7604 predicate(CompressedOops::shift() == 0);
7605 match(Set dst mem);
7606
7607 ins_cost(110);
7608 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7609 ins_encode %{
7610 __ leaq($dst$$Register, $mem$$Address);
7611 %}
7612 ins_pipe(ialu_reg_reg_fat);
7613 %}
7614
7615 instruct loadConI(rRegI dst, immI src)
7616 %{
7617 match(Set dst src);
7618
7619 format %{ "movl $dst, $src\t# int" %}
7620 ins_encode %{
7621 __ movl($dst$$Register, $src$$constant);
7622 %}
7623 ins_pipe(ialu_reg_fat); // XXX
7624 %}
7625
7626 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7627 %{
7628 match(Set dst src);
7629 effect(KILL cr);
7630
7631 ins_cost(50);
7632 format %{ "xorl $dst, $dst\t# int" %}
7633 ins_encode %{
7634 __ xorl($dst$$Register, $dst$$Register);
7635 %}
7636 ins_pipe(ialu_reg);
7637 %}
7638
7639 instruct loadConL(rRegL dst, immL src)
7640 %{
7641 match(Set dst src);
7642
7643 ins_cost(150);
7644 format %{ "movq $dst, $src\t# long" %}
7645 ins_encode %{
7646 __ mov64($dst$$Register, $src$$constant);
7647 %}
7648 ins_pipe(ialu_reg);
7649 %}
7650
7651 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7652 %{
7653 match(Set dst src);
7654 effect(KILL cr);
7655
7656 ins_cost(50);
7657 format %{ "xorl $dst, $dst\t# long" %}
7658 ins_encode %{
7659 __ xorl($dst$$Register, $dst$$Register);
7660 %}
7661 ins_pipe(ialu_reg); // XXX
7662 %}
7663
7664 instruct loadConUL32(rRegL dst, immUL32 src)
7665 %{
7666 match(Set dst src);
7667
7668 ins_cost(60);
7669 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7670 ins_encode %{
7671 __ movl($dst$$Register, $src$$constant);
7672 %}
7673 ins_pipe(ialu_reg);
7674 %}
7675
7676 instruct loadConL32(rRegL dst, immL32 src)
7677 %{
7678 match(Set dst src);
7679
7680 ins_cost(70);
7681 format %{ "movq $dst, $src\t# long (32-bit)" %}
7682 ins_encode %{
7683 __ movq($dst$$Register, $src$$constant);
7684 %}
7685 ins_pipe(ialu_reg);
7686 %}
7687
7688 instruct loadConP(rRegP dst, immP con) %{
7689 match(Set dst con);
7690
7691 format %{ "movq $dst, $con\t# ptr" %}
7692 ins_encode %{
7693 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7694 %}
7695 ins_pipe(ialu_reg_fat); // XXX
7696 %}
7697
7698 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7699 %{
7700 match(Set dst src);
7701 effect(KILL cr);
7702
7703 ins_cost(50);
7704 format %{ "xorl $dst, $dst\t# ptr" %}
7705 ins_encode %{
7706 __ xorl($dst$$Register, $dst$$Register);
7707 %}
7708 ins_pipe(ialu_reg);
7709 %}
7710
7711 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7712 %{
7713 match(Set dst src);
7714 effect(KILL cr);
7715
7716 ins_cost(60);
7717 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7718 ins_encode %{
7719 __ movl($dst$$Register, $src$$constant);
7720 %}
7721 ins_pipe(ialu_reg);
7722 %}
7723
7724 instruct loadConF(regF dst, immF con) %{
7725 match(Set dst con);
7726 ins_cost(125);
7727 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7728 ins_encode %{
7729 __ movflt($dst$$XMMRegister, $constantaddress($con));
7730 %}
7731 ins_pipe(pipe_slow);
7732 %}
7733
7734 instruct loadConH(regF dst, immH con) %{
7735 match(Set dst con);
7736 ins_cost(125);
7737 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7738 ins_encode %{
7739 __ movflt($dst$$XMMRegister, $constantaddress($con));
7740 %}
7741 ins_pipe(pipe_slow);
7742 %}
7743
7744 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7745 match(Set dst src);
7746 effect(KILL cr);
7747 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7748 ins_encode %{
7749 __ xorq($dst$$Register, $dst$$Register);
7750 %}
7751 ins_pipe(ialu_reg);
7752 %}
7753
7754 instruct loadConN(rRegN dst, immN src) %{
7755 match(Set dst src);
7756
7757 ins_cost(125);
7758 format %{ "movl $dst, $src\t# compressed ptr" %}
7759 ins_encode %{
7760 address con = (address)$src$$constant;
7761 if (con == nullptr) {
7762 ShouldNotReachHere();
7763 } else {
7764 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7765 }
7766 %}
7767 ins_pipe(ialu_reg_fat); // XXX
7768 %}
7769
7770 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7771 match(Set dst src);
7772
7773 ins_cost(125);
7774 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7775 ins_encode %{
7776 address con = (address)$src$$constant;
7777 if (con == nullptr) {
7778 ShouldNotReachHere();
7779 } else {
7780 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7781 }
7782 %}
7783 ins_pipe(ialu_reg_fat); // XXX
7784 %}
7785
7786 instruct loadConF0(regF dst, immF0 src)
7787 %{
7788 match(Set dst src);
7789 ins_cost(100);
7790
7791 format %{ "xorps $dst, $dst\t# float 0.0" %}
7792 ins_encode %{
7793 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7794 %}
7795 ins_pipe(pipe_slow);
7796 %}
7797
7798 // Use the same format since predicate() can not be used here.
7799 instruct loadConD(regD dst, immD con) %{
7800 match(Set dst con);
7801 ins_cost(125);
7802 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7803 ins_encode %{
7804 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7805 %}
7806 ins_pipe(pipe_slow);
7807 %}
7808
7809 instruct loadConD0(regD dst, immD0 src)
7810 %{
7811 match(Set dst src);
7812 ins_cost(100);
7813
7814 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7815 ins_encode %{
7816 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7817 %}
7818 ins_pipe(pipe_slow);
7819 %}
7820
7821 instruct loadSSI(rRegI dst, stackSlotI src)
7822 %{
7823 match(Set dst src);
7824
7825 ins_cost(125);
7826 format %{ "movl $dst, $src\t# int stk" %}
7827 ins_encode %{
7828 __ movl($dst$$Register, $src$$Address);
7829 %}
7830 ins_pipe(ialu_reg_mem);
7831 %}
7832
7833 instruct loadSSL(rRegL dst, stackSlotL src)
7834 %{
7835 match(Set dst src);
7836
7837 ins_cost(125);
7838 format %{ "movq $dst, $src\t# long stk" %}
7839 ins_encode %{
7840 __ movq($dst$$Register, $src$$Address);
7841 %}
7842 ins_pipe(ialu_reg_mem);
7843 %}
7844
7845 instruct loadSSP(rRegP dst, stackSlotP src)
7846 %{
7847 match(Set dst src);
7848
7849 ins_cost(125);
7850 format %{ "movq $dst, $src\t# ptr stk" %}
7851 ins_encode %{
7852 __ movq($dst$$Register, $src$$Address);
7853 %}
7854 ins_pipe(ialu_reg_mem);
7855 %}
7856
7857 instruct loadSSF(regF dst, stackSlotF src)
7858 %{
7859 match(Set dst src);
7860
7861 ins_cost(125);
7862 format %{ "movss $dst, $src\t# float stk" %}
7863 ins_encode %{
7864 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7865 %}
7866 ins_pipe(pipe_slow); // XXX
7867 %}
7868
7869 // Use the same format since predicate() can not be used here.
7870 instruct loadSSD(regD dst, stackSlotD src)
7871 %{
7872 match(Set dst src);
7873
7874 ins_cost(125);
7875 format %{ "movsd $dst, $src\t# double stk" %}
7876 ins_encode %{
7877 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7878 %}
7879 ins_pipe(pipe_slow); // XXX
7880 %}
7881
7882 // Prefetch instructions for allocation.
7883 // Must be safe to execute with invalid address (cannot fault).
7884
7885 instruct prefetchAlloc( memory mem ) %{
7886 predicate(AllocatePrefetchInstr==3);
7887 match(PrefetchAllocation mem);
7888 ins_cost(125);
7889
7890 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7891 ins_encode %{
7892 __ prefetchw($mem$$Address);
7893 %}
7894 ins_pipe(ialu_mem);
7895 %}
7896
7897 instruct prefetchAllocNTA( memory mem ) %{
7898 predicate(AllocatePrefetchInstr==0);
7899 match(PrefetchAllocation mem);
7900 ins_cost(125);
7901
7902 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7903 ins_encode %{
7904 __ prefetchnta($mem$$Address);
7905 %}
7906 ins_pipe(ialu_mem);
7907 %}
7908
7909 instruct prefetchAllocT0( memory mem ) %{
7910 predicate(AllocatePrefetchInstr==1);
7911 match(PrefetchAllocation mem);
7912 ins_cost(125);
7913
7914 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7915 ins_encode %{
7916 __ prefetcht0($mem$$Address);
7917 %}
7918 ins_pipe(ialu_mem);
7919 %}
7920
7921 instruct prefetchAllocT2( memory mem ) %{
7922 predicate(AllocatePrefetchInstr==2);
7923 match(PrefetchAllocation mem);
7924 ins_cost(125);
7925
7926 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7927 ins_encode %{
7928 __ prefetcht2($mem$$Address);
7929 %}
7930 ins_pipe(ialu_mem);
7931 %}
7932
7933 //----------Store Instructions-------------------------------------------------
7934
7935 // Store Byte
7936 instruct storeB(memory mem, rRegI src)
7937 %{
7938 match(Set mem (StoreB mem src));
7939
7940 ins_cost(125); // XXX
7941 format %{ "movb $mem, $src\t# byte" %}
7942 ins_encode %{
7943 __ movb($mem$$Address, $src$$Register);
7944 %}
7945 ins_pipe(ialu_mem_reg);
7946 %}
7947
7948 // Store Char/Short
7949 instruct storeC(memory mem, rRegI src)
7950 %{
7951 match(Set mem (StoreC mem src));
7952
7953 ins_cost(125); // XXX
7954 format %{ "movw $mem, $src\t# char/short" %}
7955 ins_encode %{
7956 __ movw($mem$$Address, $src$$Register);
7957 %}
7958 ins_pipe(ialu_mem_reg);
7959 %}
7960
7961 // Store Integer
7962 instruct storeI(memory mem, rRegI src)
7963 %{
7964 match(Set mem (StoreI mem src));
7965
7966 ins_cost(125); // XXX
7967 format %{ "movl $mem, $src\t# int" %}
7968 ins_encode %{
7969 __ movl($mem$$Address, $src$$Register);
7970 %}
7971 ins_pipe(ialu_mem_reg);
7972 %}
7973
7974 // Store Long
7975 instruct storeL(memory mem, rRegL src)
7976 %{
7977 match(Set mem (StoreL mem src));
7978
7979 ins_cost(125); // XXX
7980 format %{ "movq $mem, $src\t# long" %}
7981 ins_encode %{
7982 __ movq($mem$$Address, $src$$Register);
7983 %}
7984 ins_pipe(ialu_mem_reg); // XXX
7985 %}
7986
7987 // Store Pointer
7988 instruct storeP(memory mem, any_RegP src)
7989 %{
7990 predicate(n->as_Store()->barrier_data() == 0);
7991 match(Set mem (StoreP mem src));
7992
7993 ins_cost(125); // XXX
7994 format %{ "movq $mem, $src\t# ptr" %}
7995 ins_encode %{
7996 __ movq($mem$$Address, $src$$Register);
7997 %}
7998 ins_pipe(ialu_mem_reg);
7999 %}
8000
8001 instruct storeImmP0(memory mem, immP0 zero)
8002 %{
8003 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8004 match(Set mem (StoreP mem zero));
8005
8006 ins_cost(125); // XXX
8007 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8008 ins_encode %{
8009 __ movq($mem$$Address, r12);
8010 %}
8011 ins_pipe(ialu_mem_reg);
8012 %}
8013
8014 // Store Null Pointer, mark word, or other simple pointer constant.
8015 instruct storeImmP(memory mem, immP31 src)
8016 %{
8017 predicate(n->as_Store()->barrier_data() == 0);
8018 match(Set mem (StoreP mem src));
8019
8020 ins_cost(150); // XXX
8021 format %{ "movq $mem, $src\t# ptr" %}
8022 ins_encode %{
8023 __ movq($mem$$Address, $src$$constant);
8024 %}
8025 ins_pipe(ialu_mem_imm);
8026 %}
8027
8028 // Store Compressed Pointer
8029 instruct storeN(memory mem, rRegN src)
8030 %{
8031 predicate(n->as_Store()->barrier_data() == 0);
8032 match(Set mem (StoreN mem src));
8033
8034 ins_cost(125); // XXX
8035 format %{ "movl $mem, $src\t# compressed ptr" %}
8036 ins_encode %{
8037 __ movl($mem$$Address, $src$$Register);
8038 %}
8039 ins_pipe(ialu_mem_reg);
8040 %}
8041
8042 instruct storeNKlass(memory mem, rRegN src)
8043 %{
8044 match(Set mem (StoreNKlass mem src));
8045
8046 ins_cost(125); // XXX
8047 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8048 ins_encode %{
8049 __ movl($mem$$Address, $src$$Register);
8050 %}
8051 ins_pipe(ialu_mem_reg);
8052 %}
8053
8054 instruct storeImmN0(memory mem, immN0 zero)
8055 %{
8056 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8057 match(Set mem (StoreN mem zero));
8058
8059 ins_cost(125); // XXX
8060 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8061 ins_encode %{
8062 __ movl($mem$$Address, r12);
8063 %}
8064 ins_pipe(ialu_mem_reg);
8065 %}
8066
8067 instruct storeImmN(memory mem, immN src)
8068 %{
8069 predicate(n->as_Store()->barrier_data() == 0);
8070 match(Set mem (StoreN mem src));
8071
8072 ins_cost(150); // XXX
8073 format %{ "movl $mem, $src\t# compressed ptr" %}
8074 ins_encode %{
8075 address con = (address)$src$$constant;
8076 if (con == nullptr) {
8077 __ movl($mem$$Address, 0);
8078 } else {
8079 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8080 }
8081 %}
8082 ins_pipe(ialu_mem_imm);
8083 %}
8084
8085 instruct storeImmNKlass(memory mem, immNKlass src)
8086 %{
8087 match(Set mem (StoreNKlass mem src));
8088
8089 ins_cost(150); // XXX
8090 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8091 ins_encode %{
8092 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8093 %}
8094 ins_pipe(ialu_mem_imm);
8095 %}
8096
8097 // Store Integer Immediate
8098 instruct storeImmI0(memory mem, immI_0 zero)
8099 %{
8100 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8101 match(Set mem (StoreI mem zero));
8102
8103 ins_cost(125); // XXX
8104 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8105 ins_encode %{
8106 __ movl($mem$$Address, r12);
8107 %}
8108 ins_pipe(ialu_mem_reg);
8109 %}
8110
8111 instruct storeImmI(memory mem, immI src)
8112 %{
8113 match(Set mem (StoreI mem src));
8114
8115 ins_cost(150);
8116 format %{ "movl $mem, $src\t# int" %}
8117 ins_encode %{
8118 __ movl($mem$$Address, $src$$constant);
8119 %}
8120 ins_pipe(ialu_mem_imm);
8121 %}
8122
8123 // Store Long Immediate
8124 instruct storeImmL0(memory mem, immL0 zero)
8125 %{
8126 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8127 match(Set mem (StoreL mem zero));
8128
8129 ins_cost(125); // XXX
8130 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8131 ins_encode %{
8132 __ movq($mem$$Address, r12);
8133 %}
8134 ins_pipe(ialu_mem_reg);
8135 %}
8136
8137 instruct storeImmL(memory mem, immL32 src)
8138 %{
8139 match(Set mem (StoreL mem src));
8140
8141 ins_cost(150);
8142 format %{ "movq $mem, $src\t# long" %}
8143 ins_encode %{
8144 __ movq($mem$$Address, $src$$constant);
8145 %}
8146 ins_pipe(ialu_mem_imm);
8147 %}
8148
8149 // Store Short/Char Immediate
8150 instruct storeImmC0(memory mem, immI_0 zero)
8151 %{
8152 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8153 match(Set mem (StoreC mem zero));
8154
8155 ins_cost(125); // XXX
8156 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8157 ins_encode %{
8158 __ movw($mem$$Address, r12);
8159 %}
8160 ins_pipe(ialu_mem_reg);
8161 %}
8162
8163 instruct storeImmI16(memory mem, immI16 src)
8164 %{
8165 predicate(UseStoreImmI16);
8166 match(Set mem (StoreC mem src));
8167
8168 ins_cost(150);
8169 format %{ "movw $mem, $src\t# short/char" %}
8170 ins_encode %{
8171 __ movw($mem$$Address, $src$$constant);
8172 %}
8173 ins_pipe(ialu_mem_imm);
8174 %}
8175
8176 // Store Byte Immediate
8177 instruct storeImmB0(memory mem, immI_0 zero)
8178 %{
8179 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8180 match(Set mem (StoreB mem zero));
8181
8182 ins_cost(125); // XXX
8183 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8184 ins_encode %{
8185 __ movb($mem$$Address, r12);
8186 %}
8187 ins_pipe(ialu_mem_reg);
8188 %}
8189
8190 instruct storeImmB(memory mem, immI8 src)
8191 %{
8192 match(Set mem (StoreB mem src));
8193
8194 ins_cost(150); // XXX
8195 format %{ "movb $mem, $src\t# byte" %}
8196 ins_encode %{
8197 __ movb($mem$$Address, $src$$constant);
8198 %}
8199 ins_pipe(ialu_mem_imm);
8200 %}
8201
8202 // Store Float
8203 instruct storeF(memory mem, regF src)
8204 %{
8205 match(Set mem (StoreF mem src));
8206
8207 ins_cost(95); // XXX
8208 format %{ "movss $mem, $src\t# float" %}
8209 ins_encode %{
8210 __ movflt($mem$$Address, $src$$XMMRegister);
8211 %}
8212 ins_pipe(pipe_slow); // XXX
8213 %}
8214
8215 // Store immediate Float value (it is faster than store from XMM register)
8216 instruct storeF0(memory mem, immF0 zero)
8217 %{
8218 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8219 match(Set mem (StoreF mem zero));
8220
8221 ins_cost(25); // XXX
8222 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8223 ins_encode %{
8224 __ movl($mem$$Address, r12);
8225 %}
8226 ins_pipe(ialu_mem_reg);
8227 %}
8228
8229 instruct storeF_imm(memory mem, immF src)
8230 %{
8231 match(Set mem (StoreF mem src));
8232
8233 ins_cost(50);
8234 format %{ "movl $mem, $src\t# float" %}
8235 ins_encode %{
8236 __ movl($mem$$Address, jint_cast($src$$constant));
8237 %}
8238 ins_pipe(ialu_mem_imm);
8239 %}
8240
8241 // Store Double
8242 instruct storeD(memory mem, regD src)
8243 %{
8244 match(Set mem (StoreD mem src));
8245
8246 ins_cost(95); // XXX
8247 format %{ "movsd $mem, $src\t# double" %}
8248 ins_encode %{
8249 __ movdbl($mem$$Address, $src$$XMMRegister);
8250 %}
8251 ins_pipe(pipe_slow); // XXX
8252 %}
8253
8254 // Store immediate double 0.0 (it is faster than store from XMM register)
8255 instruct storeD0_imm(memory mem, immD0 src)
8256 %{
8257 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8258 match(Set mem (StoreD mem src));
8259
8260 ins_cost(50);
8261 format %{ "movq $mem, $src\t# double 0." %}
8262 ins_encode %{
8263 __ movq($mem$$Address, $src$$constant);
8264 %}
8265 ins_pipe(ialu_mem_imm);
8266 %}
8267
8268 instruct storeD0(memory mem, immD0 zero)
8269 %{
8270 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8271 match(Set mem (StoreD mem zero));
8272
8273 ins_cost(25); // XXX
8274 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8275 ins_encode %{
8276 __ movq($mem$$Address, r12);
8277 %}
8278 ins_pipe(ialu_mem_reg);
8279 %}
8280
8281 instruct storeSSI(stackSlotI dst, rRegI src)
8282 %{
8283 match(Set dst src);
8284
8285 ins_cost(100);
8286 format %{ "movl $dst, $src\t# int stk" %}
8287 ins_encode %{
8288 __ movl($dst$$Address, $src$$Register);
8289 %}
8290 ins_pipe( ialu_mem_reg );
8291 %}
8292
8293 instruct storeSSL(stackSlotL dst, rRegL src)
8294 %{
8295 match(Set dst src);
8296
8297 ins_cost(100);
8298 format %{ "movq $dst, $src\t# long stk" %}
8299 ins_encode %{
8300 __ movq($dst$$Address, $src$$Register);
8301 %}
8302 ins_pipe(ialu_mem_reg);
8303 %}
8304
8305 instruct storeSSP(stackSlotP dst, rRegP src)
8306 %{
8307 match(Set dst src);
8308
8309 ins_cost(100);
8310 format %{ "movq $dst, $src\t# ptr stk" %}
8311 ins_encode %{
8312 __ movq($dst$$Address, $src$$Register);
8313 %}
8314 ins_pipe(ialu_mem_reg);
8315 %}
8316
8317 instruct storeSSF(stackSlotF dst, regF src)
8318 %{
8319 match(Set dst src);
8320
8321 ins_cost(95); // XXX
8322 format %{ "movss $dst, $src\t# float stk" %}
8323 ins_encode %{
8324 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8325 %}
8326 ins_pipe(pipe_slow); // XXX
8327 %}
8328
8329 instruct storeSSD(stackSlotD dst, regD src)
8330 %{
8331 match(Set dst src);
8332
8333 ins_cost(95); // XXX
8334 format %{ "movsd $dst, $src\t# double stk" %}
8335 ins_encode %{
8336 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8337 %}
8338 ins_pipe(pipe_slow); // XXX
8339 %}
8340
8341 instruct cacheWB(indirect addr)
8342 %{
8343 predicate(VM_Version::supports_data_cache_line_flush());
8344 match(CacheWB addr);
8345
8346 ins_cost(100);
8347 format %{"cache wb $addr" %}
8348 ins_encode %{
8349 assert($addr->index_position() < 0, "should be");
8350 assert($addr$$disp == 0, "should be");
8351 __ cache_wb(Address($addr$$base$$Register, 0));
8352 %}
8353 ins_pipe(pipe_slow); // XXX
8354 %}
8355
8356 instruct cacheWBPreSync()
8357 %{
8358 predicate(VM_Version::supports_data_cache_line_flush());
8359 match(CacheWBPreSync);
8360
8361 ins_cost(100);
8362 format %{"cache wb presync" %}
8363 ins_encode %{
8364 __ cache_wbsync(true);
8365 %}
8366 ins_pipe(pipe_slow); // XXX
8367 %}
8368
8369 instruct cacheWBPostSync()
8370 %{
8371 predicate(VM_Version::supports_data_cache_line_flush());
8372 match(CacheWBPostSync);
8373
8374 ins_cost(100);
8375 format %{"cache wb postsync" %}
8376 ins_encode %{
8377 __ cache_wbsync(false);
8378 %}
8379 ins_pipe(pipe_slow); // XXX
8380 %}
8381
8382 //----------BSWAP Instructions-------------------------------------------------
8383 instruct bytes_reverse_int(rRegI dst) %{
8384 match(Set dst (ReverseBytesI dst));
8385
8386 format %{ "bswapl $dst" %}
8387 ins_encode %{
8388 __ bswapl($dst$$Register);
8389 %}
8390 ins_pipe( ialu_reg );
8391 %}
8392
8393 instruct bytes_reverse_long(rRegL dst) %{
8394 match(Set dst (ReverseBytesL dst));
8395
8396 format %{ "bswapq $dst" %}
8397 ins_encode %{
8398 __ bswapq($dst$$Register);
8399 %}
8400 ins_pipe( ialu_reg);
8401 %}
8402
8403 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8404 match(Set dst (ReverseBytesUS dst));
8405 effect(KILL cr);
8406
8407 format %{ "bswapl $dst\n\t"
8408 "shrl $dst,16\n\t" %}
8409 ins_encode %{
8410 __ bswapl($dst$$Register);
8411 __ shrl($dst$$Register, 16);
8412 %}
8413 ins_pipe( ialu_reg );
8414 %}
8415
8416 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8417 match(Set dst (ReverseBytesS dst));
8418 effect(KILL cr);
8419
8420 format %{ "bswapl $dst\n\t"
8421 "sar $dst,16\n\t" %}
8422 ins_encode %{
8423 __ bswapl($dst$$Register);
8424 __ sarl($dst$$Register, 16);
8425 %}
8426 ins_pipe( ialu_reg );
8427 %}
8428
8429 //---------- Zeros Count Instructions ------------------------------------------
8430
8431 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8432 predicate(UseCountLeadingZerosInstruction);
8433 match(Set dst (CountLeadingZerosI src));
8434 effect(KILL cr);
8435
8436 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8437 ins_encode %{
8438 __ lzcntl($dst$$Register, $src$$Register);
8439 %}
8440 ins_pipe(ialu_reg);
8441 %}
8442
8443 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8444 predicate(UseCountLeadingZerosInstruction);
8445 match(Set dst (CountLeadingZerosI (LoadI src)));
8446 effect(KILL cr);
8447 ins_cost(175);
8448 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8449 ins_encode %{
8450 __ lzcntl($dst$$Register, $src$$Address);
8451 %}
8452 ins_pipe(ialu_reg_mem);
8453 %}
8454
8455 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8456 predicate(!UseCountLeadingZerosInstruction);
8457 match(Set dst (CountLeadingZerosI src));
8458 effect(KILL cr);
8459
8460 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8461 "jnz skip\n\t"
8462 "movl $dst, -1\n"
8463 "skip:\n\t"
8464 "negl $dst\n\t"
8465 "addl $dst, 31" %}
8466 ins_encode %{
8467 Register Rdst = $dst$$Register;
8468 Register Rsrc = $src$$Register;
8469 Label skip;
8470 __ bsrl(Rdst, Rsrc);
8471 __ jccb(Assembler::notZero, skip);
8472 __ movl(Rdst, -1);
8473 __ bind(skip);
8474 __ negl(Rdst);
8475 __ addl(Rdst, BitsPerInt - 1);
8476 %}
8477 ins_pipe(ialu_reg);
8478 %}
8479
8480 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8481 predicate(UseCountLeadingZerosInstruction);
8482 match(Set dst (CountLeadingZerosL src));
8483 effect(KILL cr);
8484
8485 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8486 ins_encode %{
8487 __ lzcntq($dst$$Register, $src$$Register);
8488 %}
8489 ins_pipe(ialu_reg);
8490 %}
8491
8492 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8493 predicate(UseCountLeadingZerosInstruction);
8494 match(Set dst (CountLeadingZerosL (LoadL src)));
8495 effect(KILL cr);
8496 ins_cost(175);
8497 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8498 ins_encode %{
8499 __ lzcntq($dst$$Register, $src$$Address);
8500 %}
8501 ins_pipe(ialu_reg_mem);
8502 %}
8503
8504 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8505 predicate(!UseCountLeadingZerosInstruction);
8506 match(Set dst (CountLeadingZerosL src));
8507 effect(KILL cr);
8508
8509 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8510 "jnz skip\n\t"
8511 "movl $dst, -1\n"
8512 "skip:\n\t"
8513 "negl $dst\n\t"
8514 "addl $dst, 63" %}
8515 ins_encode %{
8516 Register Rdst = $dst$$Register;
8517 Register Rsrc = $src$$Register;
8518 Label skip;
8519 __ bsrq(Rdst, Rsrc);
8520 __ jccb(Assembler::notZero, skip);
8521 __ movl(Rdst, -1);
8522 __ bind(skip);
8523 __ negl(Rdst);
8524 __ addl(Rdst, BitsPerLong - 1);
8525 %}
8526 ins_pipe(ialu_reg);
8527 %}
8528
8529 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8530 predicate(UseCountTrailingZerosInstruction);
8531 match(Set dst (CountTrailingZerosI src));
8532 effect(KILL cr);
8533
8534 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8535 ins_encode %{
8536 __ tzcntl($dst$$Register, $src$$Register);
8537 %}
8538 ins_pipe(ialu_reg);
8539 %}
8540
8541 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8542 predicate(UseCountTrailingZerosInstruction);
8543 match(Set dst (CountTrailingZerosI (LoadI src)));
8544 effect(KILL cr);
8545 ins_cost(175);
8546 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8547 ins_encode %{
8548 __ tzcntl($dst$$Register, $src$$Address);
8549 %}
8550 ins_pipe(ialu_reg_mem);
8551 %}
8552
8553 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8554 predicate(!UseCountTrailingZerosInstruction);
8555 match(Set dst (CountTrailingZerosI src));
8556 effect(KILL cr);
8557
8558 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8559 "jnz done\n\t"
8560 "movl $dst, 32\n"
8561 "done:" %}
8562 ins_encode %{
8563 Register Rdst = $dst$$Register;
8564 Label done;
8565 __ bsfl(Rdst, $src$$Register);
8566 __ jccb(Assembler::notZero, done);
8567 __ movl(Rdst, BitsPerInt);
8568 __ bind(done);
8569 %}
8570 ins_pipe(ialu_reg);
8571 %}
8572
8573 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8574 predicate(UseCountTrailingZerosInstruction);
8575 match(Set dst (CountTrailingZerosL src));
8576 effect(KILL cr);
8577
8578 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8579 ins_encode %{
8580 __ tzcntq($dst$$Register, $src$$Register);
8581 %}
8582 ins_pipe(ialu_reg);
8583 %}
8584
8585 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8586 predicate(UseCountTrailingZerosInstruction);
8587 match(Set dst (CountTrailingZerosL (LoadL src)));
8588 effect(KILL cr);
8589 ins_cost(175);
8590 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8591 ins_encode %{
8592 __ tzcntq($dst$$Register, $src$$Address);
8593 %}
8594 ins_pipe(ialu_reg_mem);
8595 %}
8596
8597 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8598 predicate(!UseCountTrailingZerosInstruction);
8599 match(Set dst (CountTrailingZerosL src));
8600 effect(KILL cr);
8601
8602 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8603 "jnz done\n\t"
8604 "movl $dst, 64\n"
8605 "done:" %}
8606 ins_encode %{
8607 Register Rdst = $dst$$Register;
8608 Label done;
8609 __ bsfq(Rdst, $src$$Register);
8610 __ jccb(Assembler::notZero, done);
8611 __ movl(Rdst, BitsPerLong);
8612 __ bind(done);
8613 %}
8614 ins_pipe(ialu_reg);
8615 %}
8616
8617 //--------------- Reverse Operation Instructions ----------------
8618 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8619 predicate(!VM_Version::supports_gfni());
8620 match(Set dst (ReverseI src));
8621 effect(TEMP dst, TEMP rtmp, KILL cr);
8622 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8623 ins_encode %{
8624 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8625 %}
8626 ins_pipe( ialu_reg );
8627 %}
8628
8629 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8630 predicate(VM_Version::supports_gfni());
8631 match(Set dst (ReverseI src));
8632 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8633 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8634 ins_encode %{
8635 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8636 %}
8637 ins_pipe( ialu_reg );
8638 %}
8639
8640 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8641 predicate(!VM_Version::supports_gfni());
8642 match(Set dst (ReverseL src));
8643 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8644 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8645 ins_encode %{
8646 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8647 %}
8648 ins_pipe( ialu_reg );
8649 %}
8650
8651 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8652 predicate(VM_Version::supports_gfni());
8653 match(Set dst (ReverseL src));
8654 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8655 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8656 ins_encode %{
8657 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8658 %}
8659 ins_pipe( ialu_reg );
8660 %}
8661
8662 //---------- Population Count Instructions -------------------------------------
8663
8664 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8665 predicate(UsePopCountInstruction);
8666 match(Set dst (PopCountI src));
8667 effect(KILL cr);
8668
8669 format %{ "popcnt $dst, $src" %}
8670 ins_encode %{
8671 __ popcntl($dst$$Register, $src$$Register);
8672 %}
8673 ins_pipe(ialu_reg);
8674 %}
8675
8676 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8677 predicate(UsePopCountInstruction);
8678 match(Set dst (PopCountI (LoadI mem)));
8679 effect(KILL cr);
8680
8681 format %{ "popcnt $dst, $mem" %}
8682 ins_encode %{
8683 __ popcntl($dst$$Register, $mem$$Address);
8684 %}
8685 ins_pipe(ialu_reg);
8686 %}
8687
8688 // Note: Long.bitCount(long) returns an int.
8689 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8690 predicate(UsePopCountInstruction);
8691 match(Set dst (PopCountL src));
8692 effect(KILL cr);
8693
8694 format %{ "popcnt $dst, $src" %}
8695 ins_encode %{
8696 __ popcntq($dst$$Register, $src$$Register);
8697 %}
8698 ins_pipe(ialu_reg);
8699 %}
8700
8701 // Note: Long.bitCount(long) returns an int.
8702 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8703 predicate(UsePopCountInstruction);
8704 match(Set dst (PopCountL (LoadL mem)));
8705 effect(KILL cr);
8706
8707 format %{ "popcnt $dst, $mem" %}
8708 ins_encode %{
8709 __ popcntq($dst$$Register, $mem$$Address);
8710 %}
8711 ins_pipe(ialu_reg);
8712 %}
8713
8714
8715 //----------MemBar Instructions-----------------------------------------------
8716 // Memory barrier flavors
8717
8718 instruct membar_acquire()
8719 %{
8720 match(MemBarAcquire);
8721 match(LoadFence);
8722 ins_cost(0);
8723
8724 size(0);
8725 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8726 ins_encode();
8727 ins_pipe(empty);
8728 %}
8729
8730 instruct membar_acquire_lock()
8731 %{
8732 match(MemBarAcquireLock);
8733 ins_cost(0);
8734
8735 size(0);
8736 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8737 ins_encode();
8738 ins_pipe(empty);
8739 %}
8740
8741 instruct membar_release()
8742 %{
8743 match(MemBarRelease);
8744 match(StoreFence);
8745 ins_cost(0);
8746
8747 size(0);
8748 format %{ "MEMBAR-release ! (empty encoding)" %}
8749 ins_encode();
8750 ins_pipe(empty);
8751 %}
8752
8753 instruct membar_release_lock()
8754 %{
8755 match(MemBarReleaseLock);
8756 ins_cost(0);
8757
8758 size(0);
8759 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8760 ins_encode();
8761 ins_pipe(empty);
8762 %}
8763
8764 instruct membar_volatile(rFlagsReg cr) %{
8765 match(MemBarVolatile);
8766 effect(KILL cr);
8767 ins_cost(400);
8768
8769 format %{
8770 $$template
8771 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8772 %}
8773 ins_encode %{
8774 __ membar(Assembler::StoreLoad);
8775 %}
8776 ins_pipe(pipe_slow);
8777 %}
8778
8779 instruct unnecessary_membar_volatile()
8780 %{
8781 match(MemBarVolatile);
8782 predicate(Matcher::post_store_load_barrier(n));
8783 ins_cost(0);
8784
8785 size(0);
8786 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8787 ins_encode();
8788 ins_pipe(empty);
8789 %}
8790
8791 instruct membar_storestore() %{
8792 match(MemBarStoreStore);
8793 match(StoreStoreFence);
8794 ins_cost(0);
8795
8796 size(0);
8797 format %{ "MEMBAR-storestore (empty encoding)" %}
8798 ins_encode( );
8799 ins_pipe(empty);
8800 %}
8801
8802 //----------Move Instructions--------------------------------------------------
8803
8804 instruct castX2P(rRegP dst, rRegL src)
8805 %{
8806 match(Set dst (CastX2P src));
8807
8808 format %{ "movq $dst, $src\t# long->ptr" %}
8809 ins_encode %{
8810 if ($dst$$reg != $src$$reg) {
8811 __ movptr($dst$$Register, $src$$Register);
8812 }
8813 %}
8814 ins_pipe(ialu_reg_reg); // XXX
8815 %}
8816
8817 instruct castP2X(rRegL dst, rRegP src)
8818 %{
8819 match(Set dst (CastP2X src));
8820
8821 format %{ "movq $dst, $src\t# ptr -> long" %}
8822 ins_encode %{
8823 if ($dst$$reg != $src$$reg) {
8824 __ movptr($dst$$Register, $src$$Register);
8825 }
8826 %}
8827 ins_pipe(ialu_reg_reg); // XXX
8828 %}
8829
8830 // Convert oop into int for vectors alignment masking
8831 instruct convP2I(rRegI dst, rRegP src)
8832 %{
8833 match(Set dst (ConvL2I (CastP2X src)));
8834
8835 format %{ "movl $dst, $src\t# ptr -> int" %}
8836 ins_encode %{
8837 __ movl($dst$$Register, $src$$Register);
8838 %}
8839 ins_pipe(ialu_reg_reg); // XXX
8840 %}
8841
8842 // Convert compressed oop into int for vectors alignment masking
8843 // in case of 32bit oops (heap < 4Gb).
8844 instruct convN2I(rRegI dst, rRegN src)
8845 %{
8846 predicate(CompressedOops::shift() == 0);
8847 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8848
8849 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8850 ins_encode %{
8851 __ movl($dst$$Register, $src$$Register);
8852 %}
8853 ins_pipe(ialu_reg_reg); // XXX
8854 %}
8855
8856 // Convert oop pointer into compressed form
8857 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8858 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8859 match(Set dst (EncodeP src));
8860 effect(KILL cr);
8861 format %{ "encode_heap_oop $dst,$src" %}
8862 ins_encode %{
8863 Register s = $src$$Register;
8864 Register d = $dst$$Register;
8865 if (s != d) {
8866 __ movq(d, s);
8867 }
8868 __ encode_heap_oop(d);
8869 %}
8870 ins_pipe(ialu_reg_long);
8871 %}
8872
8873 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8874 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8875 match(Set dst (EncodeP src));
8876 effect(KILL cr);
8877 format %{ "encode_heap_oop_not_null $dst,$src" %}
8878 ins_encode %{
8879 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8880 %}
8881 ins_pipe(ialu_reg_long);
8882 %}
8883
8884 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8885 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8886 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8887 match(Set dst (DecodeN src));
8888 effect(KILL cr);
8889 format %{ "decode_heap_oop $dst,$src" %}
8890 ins_encode %{
8891 Register s = $src$$Register;
8892 Register d = $dst$$Register;
8893 if (s != d) {
8894 __ movq(d, s);
8895 }
8896 __ decode_heap_oop(d);
8897 %}
8898 ins_pipe(ialu_reg_long);
8899 %}
8900
8901 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8902 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8903 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8904 match(Set dst (DecodeN src));
8905 effect(KILL cr);
8906 format %{ "decode_heap_oop_not_null $dst,$src" %}
8907 ins_encode %{
8908 Register s = $src$$Register;
8909 Register d = $dst$$Register;
8910 if (s != d) {
8911 __ decode_heap_oop_not_null(d, s);
8912 } else {
8913 __ decode_heap_oop_not_null(d);
8914 }
8915 %}
8916 ins_pipe(ialu_reg_long);
8917 %}
8918
8919 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8920 match(Set dst (EncodePKlass src));
8921 effect(TEMP dst, KILL cr);
8922 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8923 ins_encode %{
8924 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8925 %}
8926 ins_pipe(ialu_reg_long);
8927 %}
8928
8929 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8930 match(Set dst (DecodeNKlass src));
8931 effect(TEMP dst, KILL cr);
8932 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8933 ins_encode %{
8934 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8935 %}
8936 ins_pipe(ialu_reg_long);
8937 %}
8938
8939 //----------Conditional Move---------------------------------------------------
8940 // Jump
8941 // dummy instruction for generating temp registers
8942 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8943 match(Jump (LShiftL switch_val shift));
8944 ins_cost(350);
8945 predicate(false);
8946 effect(TEMP dest);
8947
8948 format %{ "leaq $dest, [$constantaddress]\n\t"
8949 "jmp [$dest + $switch_val << $shift]\n\t" %}
8950 ins_encode %{
8951 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8952 // to do that and the compiler is using that register as one it can allocate.
8953 // So we build it all by hand.
8954 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
8955 // ArrayAddress dispatch(table, index);
8956 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
8957 __ lea($dest$$Register, $constantaddress);
8958 __ jmp(dispatch);
8959 %}
8960 ins_pipe(pipe_jmp);
8961 %}
8962
8963 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8964 match(Jump (AddL (LShiftL switch_val shift) offset));
8965 ins_cost(350);
8966 effect(TEMP dest);
8967
8968 format %{ "leaq $dest, [$constantaddress]\n\t"
8969 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
8970 ins_encode %{
8971 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8972 // to do that and the compiler is using that register as one it can allocate.
8973 // So we build it all by hand.
8974 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8975 // ArrayAddress dispatch(table, index);
8976 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8977 __ lea($dest$$Register, $constantaddress);
8978 __ jmp(dispatch);
8979 %}
8980 ins_pipe(pipe_jmp);
8981 %}
8982
8983 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
8984 match(Jump switch_val);
8985 ins_cost(350);
8986 effect(TEMP dest);
8987
8988 format %{ "leaq $dest, [$constantaddress]\n\t"
8989 "jmp [$dest + $switch_val]\n\t" %}
8990 ins_encode %{
8991 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8992 // to do that and the compiler is using that register as one it can allocate.
8993 // So we build it all by hand.
8994 // Address index(noreg, switch_reg, Address::times_1);
8995 // ArrayAddress dispatch(table, index);
8996 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
8997 __ lea($dest$$Register, $constantaddress);
8998 __ jmp(dispatch);
8999 %}
9000 ins_pipe(pipe_jmp);
9001 %}
9002
9003 // Conditional move
9004 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9005 %{
9006 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9007 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9008
9009 ins_cost(100); // XXX
9010 format %{ "setbn$cop $dst\t# signed, int" %}
9011 ins_encode %{
9012 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9013 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9014 %}
9015 ins_pipe(ialu_reg);
9016 %}
9017
9018 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9019 %{
9020 predicate(!UseAPX);
9021 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9022
9023 ins_cost(200); // XXX
9024 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9025 ins_encode %{
9026 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9027 %}
9028 ins_pipe(pipe_cmov_reg);
9029 %}
9030
9031 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9032 %{
9033 predicate(UseAPX);
9034 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9035
9036 ins_cost(200);
9037 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9038 ins_encode %{
9039 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9040 %}
9041 ins_pipe(pipe_cmov_reg);
9042 %}
9043
9044 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9045 %{
9046 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9047 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9048
9049 ins_cost(100); // XXX
9050 format %{ "setbn$cop $dst\t# unsigned, int" %}
9051 ins_encode %{
9052 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9053 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9054 %}
9055 ins_pipe(ialu_reg);
9056 %}
9057
9058 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9059 predicate(!UseAPX);
9060 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9061
9062 ins_cost(200); // XXX
9063 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9064 ins_encode %{
9065 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9066 %}
9067 ins_pipe(pipe_cmov_reg);
9068 %}
9069
9070 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9071 predicate(UseAPX);
9072 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9073
9074 ins_cost(200);
9075 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9076 ins_encode %{
9077 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9078 %}
9079 ins_pipe(pipe_cmov_reg);
9080 %}
9081
9082 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9083 %{
9084 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9085 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9086
9087 ins_cost(100); // XXX
9088 format %{ "setbn$cop $dst\t# unsigned, int" %}
9089 ins_encode %{
9090 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9091 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9092 %}
9093 ins_pipe(ialu_reg);
9094 %}
9095
9096 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9097 predicate(!UseAPX);
9098 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9099 ins_cost(200);
9100 expand %{
9101 cmovI_regU(cop, cr, dst, src);
9102 %}
9103 %}
9104
9105 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9106 predicate(UseAPX);
9107 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9108 ins_cost(200);
9109 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9110 ins_encode %{
9111 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9112 %}
9113 ins_pipe(pipe_cmov_reg);
9114 %}
9115
9116 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9117 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9118 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9119
9120 ins_cost(200); // XXX
9121 format %{ "cmovpl $dst, $src\n\t"
9122 "cmovnel $dst, $src" %}
9123 ins_encode %{
9124 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9125 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9126 %}
9127 ins_pipe(pipe_cmov_reg);
9128 %}
9129
9130 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9131 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9132 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9133 effect(TEMP dst);
9134
9135 ins_cost(200);
9136 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9137 "cmovnel $dst, $src2" %}
9138 ins_encode %{
9139 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9140 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9141 %}
9142 ins_pipe(pipe_cmov_reg);
9143 %}
9144
9145 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9146 // inputs of the CMove
9147 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9148 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9149 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9150 effect(TEMP dst);
9151
9152 ins_cost(200); // XXX
9153 format %{ "cmovpl $dst, $src\n\t"
9154 "cmovnel $dst, $src" %}
9155 ins_encode %{
9156 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9157 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9158 %}
9159 ins_pipe(pipe_cmov_reg);
9160 %}
9161
9162 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9163 // and parity flag bit is set if any of the operand is a NaN.
9164 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9165 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9166 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9167 effect(TEMP dst);
9168
9169 ins_cost(200);
9170 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9171 "cmovnel $dst, $src2" %}
9172 ins_encode %{
9173 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9174 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9175 %}
9176 ins_pipe(pipe_cmov_reg);
9177 %}
9178
9179 // Conditional move
9180 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9181 predicate(!UseAPX);
9182 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9183
9184 ins_cost(250); // XXX
9185 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9186 ins_encode %{
9187 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9188 %}
9189 ins_pipe(pipe_cmov_mem);
9190 %}
9191
9192 // Conditional move
9193 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9194 %{
9195 predicate(UseAPX);
9196 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9197
9198 ins_cost(250);
9199 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9200 ins_encode %{
9201 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9202 %}
9203 ins_pipe(pipe_cmov_mem);
9204 %}
9205
9206 // Conditional move
9207 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9208 %{
9209 predicate(!UseAPX);
9210 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9211
9212 ins_cost(250); // XXX
9213 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9214 ins_encode %{
9215 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9216 %}
9217 ins_pipe(pipe_cmov_mem);
9218 %}
9219
9220 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9221 predicate(!UseAPX);
9222 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9223 ins_cost(250);
9224 expand %{
9225 cmovI_memU(cop, cr, dst, src);
9226 %}
9227 %}
9228
9229 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9230 %{
9231 predicate(UseAPX);
9232 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9233
9234 ins_cost(250);
9235 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9236 ins_encode %{
9237 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9238 %}
9239 ins_pipe(pipe_cmov_mem);
9240 %}
9241
9242 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9243 %{
9244 predicate(UseAPX);
9245 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9246 ins_cost(250);
9247 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9248 ins_encode %{
9249 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9250 %}
9251 ins_pipe(pipe_cmov_mem);
9252 %}
9253
9254 // Conditional move
9255 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9256 %{
9257 predicate(!UseAPX);
9258 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9259
9260 ins_cost(200); // XXX
9261 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9262 ins_encode %{
9263 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9264 %}
9265 ins_pipe(pipe_cmov_reg);
9266 %}
9267
9268 // Conditional move ndd
9269 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9270 %{
9271 predicate(UseAPX);
9272 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9273
9274 ins_cost(200);
9275 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9276 ins_encode %{
9277 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9278 %}
9279 ins_pipe(pipe_cmov_reg);
9280 %}
9281
9282 // Conditional move
9283 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9284 %{
9285 predicate(!UseAPX);
9286 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9287
9288 ins_cost(200); // XXX
9289 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9290 ins_encode %{
9291 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9292 %}
9293 ins_pipe(pipe_cmov_reg);
9294 %}
9295
9296 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9297 predicate(!UseAPX);
9298 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9299 ins_cost(200);
9300 expand %{
9301 cmovN_regU(cop, cr, dst, src);
9302 %}
9303 %}
9304
9305 // Conditional move ndd
9306 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9307 %{
9308 predicate(UseAPX);
9309 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9310
9311 ins_cost(200);
9312 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9313 ins_encode %{
9314 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9315 %}
9316 ins_pipe(pipe_cmov_reg);
9317 %}
9318
9319 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9320 predicate(UseAPX);
9321 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9322 ins_cost(200);
9323 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9324 ins_encode %{
9325 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9326 %}
9327 ins_pipe(pipe_cmov_reg);
9328 %}
9329
9330 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9331 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9332 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9333
9334 ins_cost(200); // XXX
9335 format %{ "cmovpl $dst, $src\n\t"
9336 "cmovnel $dst, $src" %}
9337 ins_encode %{
9338 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9339 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9340 %}
9341 ins_pipe(pipe_cmov_reg);
9342 %}
9343
9344 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9345 // inputs of the CMove
9346 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9347 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9348 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9349
9350 ins_cost(200); // XXX
9351 format %{ "cmovpl $dst, $src\n\t"
9352 "cmovnel $dst, $src" %}
9353 ins_encode %{
9354 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9355 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9356 %}
9357 ins_pipe(pipe_cmov_reg);
9358 %}
9359
9360 // Conditional move
9361 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9362 %{
9363 predicate(!UseAPX);
9364 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9365
9366 ins_cost(200); // XXX
9367 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9368 ins_encode %{
9369 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9370 %}
9371 ins_pipe(pipe_cmov_reg); // XXX
9372 %}
9373
9374 // Conditional move ndd
9375 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9376 %{
9377 predicate(UseAPX);
9378 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9379
9380 ins_cost(200);
9381 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9382 ins_encode %{
9383 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9384 %}
9385 ins_pipe(pipe_cmov_reg);
9386 %}
9387
9388 // Conditional move
9389 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9390 %{
9391 predicate(!UseAPX);
9392 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9393
9394 ins_cost(200); // XXX
9395 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9396 ins_encode %{
9397 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9398 %}
9399 ins_pipe(pipe_cmov_reg); // XXX
9400 %}
9401
9402 // Conditional move ndd
9403 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9404 %{
9405 predicate(UseAPX);
9406 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9407
9408 ins_cost(200);
9409 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9410 ins_encode %{
9411 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9412 %}
9413 ins_pipe(pipe_cmov_reg);
9414 %}
9415
9416 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9417 predicate(!UseAPX);
9418 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9419 ins_cost(200);
9420 expand %{
9421 cmovP_regU(cop, cr, dst, src);
9422 %}
9423 %}
9424
9425 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9426 predicate(UseAPX);
9427 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9428 ins_cost(200);
9429 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9430 ins_encode %{
9431 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9432 %}
9433 ins_pipe(pipe_cmov_reg);
9434 %}
9435
9436 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9437 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9438 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9439
9440 ins_cost(200); // XXX
9441 format %{ "cmovpq $dst, $src\n\t"
9442 "cmovneq $dst, $src" %}
9443 ins_encode %{
9444 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9445 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9446 %}
9447 ins_pipe(pipe_cmov_reg);
9448 %}
9449
9450 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9451 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9452 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9453 effect(TEMP dst);
9454
9455 ins_cost(200);
9456 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9457 "cmovneq $dst, $src2" %}
9458 ins_encode %{
9459 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9460 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9461 %}
9462 ins_pipe(pipe_cmov_reg);
9463 %}
9464
9465 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9466 // inputs of the CMove
9467 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9468 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9469 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9470
9471 ins_cost(200); // XXX
9472 format %{ "cmovpq $dst, $src\n\t"
9473 "cmovneq $dst, $src" %}
9474 ins_encode %{
9475 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9476 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9477 %}
9478 ins_pipe(pipe_cmov_reg);
9479 %}
9480
9481 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9482 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9483 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9484 effect(TEMP dst);
9485
9486 ins_cost(200);
9487 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9488 "cmovneq $dst, $src2" %}
9489 ins_encode %{
9490 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9491 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9492 %}
9493 ins_pipe(pipe_cmov_reg);
9494 %}
9495
9496 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9497 %{
9498 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9499 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9500
9501 ins_cost(100); // XXX
9502 format %{ "setbn$cop $dst\t# signed, long" %}
9503 ins_encode %{
9504 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9505 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9506 %}
9507 ins_pipe(ialu_reg);
9508 %}
9509
9510 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9511 %{
9512 predicate(!UseAPX);
9513 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9514
9515 ins_cost(200); // XXX
9516 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9517 ins_encode %{
9518 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9519 %}
9520 ins_pipe(pipe_cmov_reg); // XXX
9521 %}
9522
9523 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9524 %{
9525 predicate(UseAPX);
9526 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9527
9528 ins_cost(200);
9529 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9530 ins_encode %{
9531 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9532 %}
9533 ins_pipe(pipe_cmov_reg);
9534 %}
9535
9536 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9537 %{
9538 predicate(!UseAPX);
9539 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9540
9541 ins_cost(200); // XXX
9542 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9543 ins_encode %{
9544 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9545 %}
9546 ins_pipe(pipe_cmov_mem); // XXX
9547 %}
9548
9549 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9550 %{
9551 predicate(UseAPX);
9552 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9553
9554 ins_cost(200);
9555 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9556 ins_encode %{
9557 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9558 %}
9559 ins_pipe(pipe_cmov_mem);
9560 %}
9561
9562 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9563 %{
9564 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9565 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9566
9567 ins_cost(100); // XXX
9568 format %{ "setbn$cop $dst\t# unsigned, long" %}
9569 ins_encode %{
9570 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9571 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9572 %}
9573 ins_pipe(ialu_reg);
9574 %}
9575
9576 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9577 %{
9578 predicate(!UseAPX);
9579 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9580
9581 ins_cost(200); // XXX
9582 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9583 ins_encode %{
9584 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9585 %}
9586 ins_pipe(pipe_cmov_reg); // XXX
9587 %}
9588
9589 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9590 %{
9591 predicate(UseAPX);
9592 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9593
9594 ins_cost(200);
9595 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9596 ins_encode %{
9597 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9598 %}
9599 ins_pipe(pipe_cmov_reg);
9600 %}
9601
9602 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9603 %{
9604 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9605 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9606
9607 ins_cost(100); // XXX
9608 format %{ "setbn$cop $dst\t# unsigned, long" %}
9609 ins_encode %{
9610 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9611 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9612 %}
9613 ins_pipe(ialu_reg);
9614 %}
9615
9616 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9617 predicate(!UseAPX);
9618 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9619 ins_cost(200);
9620 expand %{
9621 cmovL_regU(cop, cr, dst, src);
9622 %}
9623 %}
9624
9625 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9626 %{
9627 predicate(UseAPX);
9628 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9629 ins_cost(200);
9630 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9631 ins_encode %{
9632 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9633 %}
9634 ins_pipe(pipe_cmov_reg);
9635 %}
9636
9637 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9638 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9639 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9640
9641 ins_cost(200); // XXX
9642 format %{ "cmovpq $dst, $src\n\t"
9643 "cmovneq $dst, $src" %}
9644 ins_encode %{
9645 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9646 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9647 %}
9648 ins_pipe(pipe_cmov_reg);
9649 %}
9650
9651 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9652 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9653 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9654 effect(TEMP dst);
9655
9656 ins_cost(200);
9657 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9658 "cmovneq $dst, $src2" %}
9659 ins_encode %{
9660 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9661 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9662 %}
9663 ins_pipe(pipe_cmov_reg);
9664 %}
9665
9666 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9667 // inputs of the CMove
9668 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9669 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9670 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9671
9672 ins_cost(200); // XXX
9673 format %{ "cmovpq $dst, $src\n\t"
9674 "cmovneq $dst, $src" %}
9675 ins_encode %{
9676 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9677 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9678 %}
9679 ins_pipe(pipe_cmov_reg);
9680 %}
9681
9682 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9683 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9684 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9685 effect(TEMP dst);
9686
9687 ins_cost(200);
9688 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9689 "cmovneq $dst, $src2" %}
9690 ins_encode %{
9691 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9692 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9693 %}
9694 ins_pipe(pipe_cmov_reg);
9695 %}
9696
9697 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9698 %{
9699 predicate(!UseAPX);
9700 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9701
9702 ins_cost(200); // XXX
9703 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9704 ins_encode %{
9705 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9706 %}
9707 ins_pipe(pipe_cmov_mem); // XXX
9708 %}
9709
9710 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9711 predicate(!UseAPX);
9712 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9713 ins_cost(200);
9714 expand %{
9715 cmovL_memU(cop, cr, dst, src);
9716 %}
9717 %}
9718
9719 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9720 %{
9721 predicate(UseAPX);
9722 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9723
9724 ins_cost(200);
9725 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9726 ins_encode %{
9727 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9728 %}
9729 ins_pipe(pipe_cmov_mem);
9730 %}
9731
9732 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9733 %{
9734 predicate(UseAPX);
9735 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9736 ins_cost(200);
9737 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9738 ins_encode %{
9739 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9740 %}
9741 ins_pipe(pipe_cmov_mem);
9742 %}
9743
9744 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9745 %{
9746 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9747
9748 ins_cost(200); // XXX
9749 format %{ "jn$cop skip\t# signed cmove float\n\t"
9750 "movss $dst, $src\n"
9751 "skip:" %}
9752 ins_encode %{
9753 Label Lskip;
9754 // Invert sense of branch from sense of CMOV
9755 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9756 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9757 __ bind(Lskip);
9758 %}
9759 ins_pipe(pipe_slow);
9760 %}
9761
9762 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9763 %{
9764 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9765
9766 ins_cost(200); // XXX
9767 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9768 "movss $dst, $src\n"
9769 "skip:" %}
9770 ins_encode %{
9771 Label Lskip;
9772 // Invert sense of branch from sense of CMOV
9773 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9774 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9775 __ bind(Lskip);
9776 %}
9777 ins_pipe(pipe_slow);
9778 %}
9779
9780 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9781 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9782 ins_cost(200);
9783 expand %{
9784 cmovF_regU(cop, cr, dst, src);
9785 %}
9786 %}
9787
9788 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9789 %{
9790 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9791
9792 ins_cost(200); // XXX
9793 format %{ "jn$cop skip\t# signed cmove double\n\t"
9794 "movsd $dst, $src\n"
9795 "skip:" %}
9796 ins_encode %{
9797 Label Lskip;
9798 // Invert sense of branch from sense of CMOV
9799 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9800 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9801 __ bind(Lskip);
9802 %}
9803 ins_pipe(pipe_slow);
9804 %}
9805
9806 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9807 %{
9808 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9809
9810 ins_cost(200); // XXX
9811 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9812 "movsd $dst, $src\n"
9813 "skip:" %}
9814 ins_encode %{
9815 Label Lskip;
9816 // Invert sense of branch from sense of CMOV
9817 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9818 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9819 __ bind(Lskip);
9820 %}
9821 ins_pipe(pipe_slow);
9822 %}
9823
9824 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9825 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9826 ins_cost(200);
9827 expand %{
9828 cmovD_regU(cop, cr, dst, src);
9829 %}
9830 %}
9831
9832 //----------Arithmetic Instructions--------------------------------------------
9833 //----------Addition Instructions----------------------------------------------
9834
9835 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9836 %{
9837 predicate(!UseAPX);
9838 match(Set dst (AddI dst src));
9839 effect(KILL cr);
9840 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9841 format %{ "addl $dst, $src\t# int" %}
9842 ins_encode %{
9843 __ addl($dst$$Register, $src$$Register);
9844 %}
9845 ins_pipe(ialu_reg_reg);
9846 %}
9847
9848 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9849 %{
9850 predicate(UseAPX);
9851 match(Set dst (AddI src1 src2));
9852 effect(KILL cr);
9853 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9854
9855 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9856 ins_encode %{
9857 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9858 %}
9859 ins_pipe(ialu_reg_reg);
9860 %}
9861
9862 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9863 %{
9864 predicate(!UseAPX);
9865 match(Set dst (AddI dst src));
9866 effect(KILL cr);
9867 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9868
9869 format %{ "addl $dst, $src\t# int" %}
9870 ins_encode %{
9871 __ addl($dst$$Register, $src$$constant);
9872 %}
9873 ins_pipe( ialu_reg );
9874 %}
9875
9876 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9877 %{
9878 predicate(UseAPX);
9879 match(Set dst (AddI src1 src2));
9880 effect(KILL cr);
9881 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9882
9883 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9884 ins_encode %{
9885 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9886 %}
9887 ins_pipe( ialu_reg );
9888 %}
9889
9890 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9891 %{
9892 predicate(UseAPX);
9893 match(Set dst (AddI (LoadI src1) src2));
9894 effect(KILL cr);
9895 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9896
9897 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9898 ins_encode %{
9899 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9900 %}
9901 ins_pipe( ialu_reg );
9902 %}
9903
9904 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9905 %{
9906 predicate(!UseAPX);
9907 match(Set dst (AddI dst (LoadI src)));
9908 effect(KILL cr);
9909 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9910
9911 ins_cost(150); // XXX
9912 format %{ "addl $dst, $src\t# int" %}
9913 ins_encode %{
9914 __ addl($dst$$Register, $src$$Address);
9915 %}
9916 ins_pipe(ialu_reg_mem);
9917 %}
9918
9919 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9920 %{
9921 predicate(UseAPX);
9922 match(Set dst (AddI src1 (LoadI src2)));
9923 effect(KILL cr);
9924 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9925
9926 ins_cost(150);
9927 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9928 ins_encode %{
9929 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9930 %}
9931 ins_pipe(ialu_reg_mem);
9932 %}
9933
9934 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9935 %{
9936 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9937 effect(KILL cr);
9938 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9939
9940 ins_cost(150); // XXX
9941 format %{ "addl $dst, $src\t# int" %}
9942 ins_encode %{
9943 __ addl($dst$$Address, $src$$Register);
9944 %}
9945 ins_pipe(ialu_mem_reg);
9946 %}
9947
9948 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9949 %{
9950 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9951 effect(KILL cr);
9952 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9953
9954
9955 ins_cost(125); // XXX
9956 format %{ "addl $dst, $src\t# int" %}
9957 ins_encode %{
9958 __ addl($dst$$Address, $src$$constant);
9959 %}
9960 ins_pipe(ialu_mem_imm);
9961 %}
9962
9963 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9964 %{
9965 predicate(!UseAPX && UseIncDec);
9966 match(Set dst (AddI dst src));
9967 effect(KILL cr);
9968
9969 format %{ "incl $dst\t# int" %}
9970 ins_encode %{
9971 __ incrementl($dst$$Register);
9972 %}
9973 ins_pipe(ialu_reg);
9974 %}
9975
9976 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
9977 %{
9978 predicate(UseAPX && UseIncDec);
9979 match(Set dst (AddI src val));
9980 effect(KILL cr);
9981
9982 format %{ "eincl $dst, $src\t# int ndd" %}
9983 ins_encode %{
9984 __ eincl($dst$$Register, $src$$Register, false);
9985 %}
9986 ins_pipe(ialu_reg);
9987 %}
9988
9989 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
9990 %{
9991 predicate(UseAPX && UseIncDec);
9992 match(Set dst (AddI (LoadI src) val));
9993 effect(KILL cr);
9994
9995 format %{ "eincl $dst, $src\t# int ndd" %}
9996 ins_encode %{
9997 __ eincl($dst$$Register, $src$$Address, false);
9998 %}
9999 ins_pipe(ialu_reg);
10000 %}
10001
10002 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10003 %{
10004 predicate(UseIncDec);
10005 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10006 effect(KILL cr);
10007
10008 ins_cost(125); // XXX
10009 format %{ "incl $dst\t# int" %}
10010 ins_encode %{
10011 __ incrementl($dst$$Address);
10012 %}
10013 ins_pipe(ialu_mem_imm);
10014 %}
10015
10016 // XXX why does that use AddI
10017 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10018 %{
10019 predicate(!UseAPX && UseIncDec);
10020 match(Set dst (AddI dst src));
10021 effect(KILL cr);
10022
10023 format %{ "decl $dst\t# int" %}
10024 ins_encode %{
10025 __ decrementl($dst$$Register);
10026 %}
10027 ins_pipe(ialu_reg);
10028 %}
10029
10030 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10031 %{
10032 predicate(UseAPX && UseIncDec);
10033 match(Set dst (AddI src val));
10034 effect(KILL cr);
10035
10036 format %{ "edecl $dst, $src\t# int ndd" %}
10037 ins_encode %{
10038 __ edecl($dst$$Register, $src$$Register, false);
10039 %}
10040 ins_pipe(ialu_reg);
10041 %}
10042
10043 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10044 %{
10045 predicate(UseAPX && UseIncDec);
10046 match(Set dst (AddI (LoadI src) val));
10047 effect(KILL cr);
10048
10049 format %{ "edecl $dst, $src\t# int ndd" %}
10050 ins_encode %{
10051 __ edecl($dst$$Register, $src$$Address, false);
10052 %}
10053 ins_pipe(ialu_reg);
10054 %}
10055
10056 // XXX why does that use AddI
10057 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10058 %{
10059 predicate(UseIncDec);
10060 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10061 effect(KILL cr);
10062
10063 ins_cost(125); // XXX
10064 format %{ "decl $dst\t# int" %}
10065 ins_encode %{
10066 __ decrementl($dst$$Address);
10067 %}
10068 ins_pipe(ialu_mem_imm);
10069 %}
10070
10071 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10072 %{
10073 predicate(VM_Version::supports_fast_2op_lea());
10074 match(Set dst (AddI (LShiftI index scale) disp));
10075
10076 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10077 ins_encode %{
10078 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10079 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10080 %}
10081 ins_pipe(ialu_reg_reg);
10082 %}
10083
10084 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10085 %{
10086 predicate(VM_Version::supports_fast_3op_lea());
10087 match(Set dst (AddI (AddI base index) disp));
10088
10089 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10090 ins_encode %{
10091 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10092 %}
10093 ins_pipe(ialu_reg_reg);
10094 %}
10095
10096 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10097 %{
10098 predicate(VM_Version::supports_fast_2op_lea());
10099 match(Set dst (AddI base (LShiftI index scale)));
10100
10101 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10102 ins_encode %{
10103 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10104 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10105 %}
10106 ins_pipe(ialu_reg_reg);
10107 %}
10108
10109 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10110 %{
10111 predicate(VM_Version::supports_fast_3op_lea());
10112 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10113
10114 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10115 ins_encode %{
10116 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10117 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10118 %}
10119 ins_pipe(ialu_reg_reg);
10120 %}
10121
10122 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10123 %{
10124 predicate(!UseAPX);
10125 match(Set dst (AddL dst src));
10126 effect(KILL cr);
10127 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10128
10129 format %{ "addq $dst, $src\t# long" %}
10130 ins_encode %{
10131 __ addq($dst$$Register, $src$$Register);
10132 %}
10133 ins_pipe(ialu_reg_reg);
10134 %}
10135
10136 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10137 %{
10138 predicate(UseAPX);
10139 match(Set dst (AddL src1 src2));
10140 effect(KILL cr);
10141 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10142
10143 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10144 ins_encode %{
10145 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10146 %}
10147 ins_pipe(ialu_reg_reg);
10148 %}
10149
10150 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10151 %{
10152 predicate(!UseAPX);
10153 match(Set dst (AddL dst src));
10154 effect(KILL cr);
10155 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10156
10157 format %{ "addq $dst, $src\t# long" %}
10158 ins_encode %{
10159 __ addq($dst$$Register, $src$$constant);
10160 %}
10161 ins_pipe( ialu_reg );
10162 %}
10163
10164 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10165 %{
10166 predicate(UseAPX);
10167 match(Set dst (AddL src1 src2));
10168 effect(KILL cr);
10169 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10170
10171 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10172 ins_encode %{
10173 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10174 %}
10175 ins_pipe( ialu_reg );
10176 %}
10177
10178 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10179 %{
10180 predicate(UseAPX);
10181 match(Set dst (AddL (LoadL src1) src2));
10182 effect(KILL cr);
10183 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10184
10185 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10186 ins_encode %{
10187 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10188 %}
10189 ins_pipe( ialu_reg );
10190 %}
10191
10192 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10193 %{
10194 predicate(!UseAPX);
10195 match(Set dst (AddL dst (LoadL src)));
10196 effect(KILL cr);
10197 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10198
10199 ins_cost(150); // XXX
10200 format %{ "addq $dst, $src\t# long" %}
10201 ins_encode %{
10202 __ addq($dst$$Register, $src$$Address);
10203 %}
10204 ins_pipe(ialu_reg_mem);
10205 %}
10206
10207 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10208 %{
10209 predicate(UseAPX);
10210 match(Set dst (AddL src1 (LoadL src2)));
10211 effect(KILL cr);
10212 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10213
10214 ins_cost(150);
10215 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10216 ins_encode %{
10217 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10218 %}
10219 ins_pipe(ialu_reg_mem);
10220 %}
10221
10222 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10223 %{
10224 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10225 effect(KILL cr);
10226 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10227
10228 ins_cost(150); // XXX
10229 format %{ "addq $dst, $src\t# long" %}
10230 ins_encode %{
10231 __ addq($dst$$Address, $src$$Register);
10232 %}
10233 ins_pipe(ialu_mem_reg);
10234 %}
10235
10236 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10237 %{
10238 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10239 effect(KILL cr);
10240 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10241
10242 ins_cost(125); // XXX
10243 format %{ "addq $dst, $src\t# long" %}
10244 ins_encode %{
10245 __ addq($dst$$Address, $src$$constant);
10246 %}
10247 ins_pipe(ialu_mem_imm);
10248 %}
10249
10250 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10251 %{
10252 predicate(!UseAPX && UseIncDec);
10253 match(Set dst (AddL dst src));
10254 effect(KILL cr);
10255
10256 format %{ "incq $dst\t# long" %}
10257 ins_encode %{
10258 __ incrementq($dst$$Register);
10259 %}
10260 ins_pipe(ialu_reg);
10261 %}
10262
10263 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10264 %{
10265 predicate(UseAPX && UseIncDec);
10266 match(Set dst (AddL src val));
10267 effect(KILL cr);
10268
10269 format %{ "eincq $dst, $src\t# long ndd" %}
10270 ins_encode %{
10271 __ eincq($dst$$Register, $src$$Register, false);
10272 %}
10273 ins_pipe(ialu_reg);
10274 %}
10275
10276 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10277 %{
10278 predicate(UseAPX && UseIncDec);
10279 match(Set dst (AddL (LoadL src) val));
10280 effect(KILL cr);
10281
10282 format %{ "eincq $dst, $src\t# long ndd" %}
10283 ins_encode %{
10284 __ eincq($dst$$Register, $src$$Address, false);
10285 %}
10286 ins_pipe(ialu_reg);
10287 %}
10288
10289 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10290 %{
10291 predicate(UseIncDec);
10292 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10293 effect(KILL cr);
10294
10295 ins_cost(125); // XXX
10296 format %{ "incq $dst\t# long" %}
10297 ins_encode %{
10298 __ incrementq($dst$$Address);
10299 %}
10300 ins_pipe(ialu_mem_imm);
10301 %}
10302
10303 // XXX why does that use AddL
10304 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10305 %{
10306 predicate(!UseAPX && UseIncDec);
10307 match(Set dst (AddL dst src));
10308 effect(KILL cr);
10309
10310 format %{ "decq $dst\t# long" %}
10311 ins_encode %{
10312 __ decrementq($dst$$Register);
10313 %}
10314 ins_pipe(ialu_reg);
10315 %}
10316
10317 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10318 %{
10319 predicate(UseAPX && UseIncDec);
10320 match(Set dst (AddL src val));
10321 effect(KILL cr);
10322
10323 format %{ "edecq $dst, $src\t# long ndd" %}
10324 ins_encode %{
10325 __ edecq($dst$$Register, $src$$Register, false);
10326 %}
10327 ins_pipe(ialu_reg);
10328 %}
10329
10330 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10331 %{
10332 predicate(UseAPX && UseIncDec);
10333 match(Set dst (AddL (LoadL src) val));
10334 effect(KILL cr);
10335
10336 format %{ "edecq $dst, $src\t# long ndd" %}
10337 ins_encode %{
10338 __ edecq($dst$$Register, $src$$Address, false);
10339 %}
10340 ins_pipe(ialu_reg);
10341 %}
10342
10343 // XXX why does that use AddL
10344 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10345 %{
10346 predicate(UseIncDec);
10347 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10348 effect(KILL cr);
10349
10350 ins_cost(125); // XXX
10351 format %{ "decq $dst\t# long" %}
10352 ins_encode %{
10353 __ decrementq($dst$$Address);
10354 %}
10355 ins_pipe(ialu_mem_imm);
10356 %}
10357
10358 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10359 %{
10360 predicate(VM_Version::supports_fast_2op_lea());
10361 match(Set dst (AddL (LShiftL index scale) disp));
10362
10363 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10364 ins_encode %{
10365 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10366 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10367 %}
10368 ins_pipe(ialu_reg_reg);
10369 %}
10370
10371 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10372 %{
10373 predicate(VM_Version::supports_fast_3op_lea());
10374 match(Set dst (AddL (AddL base index) disp));
10375
10376 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10377 ins_encode %{
10378 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10379 %}
10380 ins_pipe(ialu_reg_reg);
10381 %}
10382
10383 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10384 %{
10385 predicate(VM_Version::supports_fast_2op_lea());
10386 match(Set dst (AddL base (LShiftL index scale)));
10387
10388 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10389 ins_encode %{
10390 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10391 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10392 %}
10393 ins_pipe(ialu_reg_reg);
10394 %}
10395
10396 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10397 %{
10398 predicate(VM_Version::supports_fast_3op_lea());
10399 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10400
10401 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10402 ins_encode %{
10403 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10404 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10405 %}
10406 ins_pipe(ialu_reg_reg);
10407 %}
10408
10409 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10410 %{
10411 match(Set dst (AddP dst src));
10412 effect(KILL cr);
10413 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10414
10415 format %{ "addq $dst, $src\t# ptr" %}
10416 ins_encode %{
10417 __ addq($dst$$Register, $src$$Register);
10418 %}
10419 ins_pipe(ialu_reg_reg);
10420 %}
10421
10422 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10423 %{
10424 match(Set dst (AddP dst src));
10425 effect(KILL cr);
10426 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10427
10428 format %{ "addq $dst, $src\t# ptr" %}
10429 ins_encode %{
10430 __ addq($dst$$Register, $src$$constant);
10431 %}
10432 ins_pipe( ialu_reg );
10433 %}
10434
10435 // XXX addP mem ops ????
10436
10437 instruct checkCastPP(rRegP dst)
10438 %{
10439 match(Set dst (CheckCastPP dst));
10440
10441 size(0);
10442 format %{ "# checkcastPP of $dst" %}
10443 ins_encode(/* empty encoding */);
10444 ins_pipe(empty);
10445 %}
10446
10447 instruct castPP(rRegP dst)
10448 %{
10449 match(Set dst (CastPP dst));
10450
10451 size(0);
10452 format %{ "# castPP of $dst" %}
10453 ins_encode(/* empty encoding */);
10454 ins_pipe(empty);
10455 %}
10456
10457 instruct castII(rRegI dst)
10458 %{
10459 predicate(VerifyConstraintCasts == 0);
10460 match(Set dst (CastII dst));
10461
10462 size(0);
10463 format %{ "# castII of $dst" %}
10464 ins_encode(/* empty encoding */);
10465 ins_cost(0);
10466 ins_pipe(empty);
10467 %}
10468
10469 instruct castII_checked(rRegI dst, rFlagsReg cr)
10470 %{
10471 predicate(VerifyConstraintCasts > 0);
10472 match(Set dst (CastII dst));
10473
10474 effect(KILL cr);
10475 format %{ "# cast_checked_II $dst" %}
10476 ins_encode %{
10477 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10478 %}
10479 ins_pipe(pipe_slow);
10480 %}
10481
10482 instruct castLL(rRegL dst)
10483 %{
10484 predicate(VerifyConstraintCasts == 0);
10485 match(Set dst (CastLL dst));
10486
10487 size(0);
10488 format %{ "# castLL of $dst" %}
10489 ins_encode(/* empty encoding */);
10490 ins_cost(0);
10491 ins_pipe(empty);
10492 %}
10493
10494 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10495 %{
10496 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10497 match(Set dst (CastLL dst));
10498
10499 effect(KILL cr);
10500 format %{ "# cast_checked_LL $dst" %}
10501 ins_encode %{
10502 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10503 %}
10504 ins_pipe(pipe_slow);
10505 %}
10506
10507 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10508 %{
10509 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10510 match(Set dst (CastLL dst));
10511
10512 effect(KILL cr, TEMP tmp);
10513 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10514 ins_encode %{
10515 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10516 %}
10517 ins_pipe(pipe_slow);
10518 %}
10519
10520 instruct castFF(regF dst)
10521 %{
10522 match(Set dst (CastFF dst));
10523
10524 size(0);
10525 format %{ "# castFF of $dst" %}
10526 ins_encode(/* empty encoding */);
10527 ins_cost(0);
10528 ins_pipe(empty);
10529 %}
10530
10531 instruct castHH(regF dst)
10532 %{
10533 match(Set dst (CastHH dst));
10534
10535 size(0);
10536 format %{ "# castHH of $dst" %}
10537 ins_encode(/* empty encoding */);
10538 ins_cost(0);
10539 ins_pipe(empty);
10540 %}
10541
10542 instruct castDD(regD dst)
10543 %{
10544 match(Set dst (CastDD dst));
10545
10546 size(0);
10547 format %{ "# castDD of $dst" %}
10548 ins_encode(/* empty encoding */);
10549 ins_cost(0);
10550 ins_pipe(empty);
10551 %}
10552
10553 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10554 instruct compareAndSwapP(rRegI res,
10555 memory mem_ptr,
10556 rax_RegP oldval, rRegP newval,
10557 rFlagsReg cr)
10558 %{
10559 predicate(n->as_LoadStore()->barrier_data() == 0);
10560 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10561 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10562 effect(KILL cr, KILL oldval);
10563
10564 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10565 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10566 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10567 ins_encode %{
10568 __ lock();
10569 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10570 __ setcc(Assembler::equal, $res$$Register);
10571 %}
10572 ins_pipe( pipe_cmpxchg );
10573 %}
10574
10575 instruct compareAndSwapL(rRegI res,
10576 memory mem_ptr,
10577 rax_RegL oldval, rRegL newval,
10578 rFlagsReg cr)
10579 %{
10580 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10581 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10582 effect(KILL cr, KILL oldval);
10583
10584 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10585 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10586 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10587 ins_encode %{
10588 __ lock();
10589 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10590 __ setcc(Assembler::equal, $res$$Register);
10591 %}
10592 ins_pipe( pipe_cmpxchg );
10593 %}
10594
10595 instruct compareAndSwapI(rRegI res,
10596 memory mem_ptr,
10597 rax_RegI oldval, rRegI newval,
10598 rFlagsReg cr)
10599 %{
10600 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10601 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10602 effect(KILL cr, KILL oldval);
10603
10604 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10605 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10606 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10607 ins_encode %{
10608 __ lock();
10609 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10610 __ setcc(Assembler::equal, $res$$Register);
10611 %}
10612 ins_pipe( pipe_cmpxchg );
10613 %}
10614
10615 instruct compareAndSwapB(rRegI res,
10616 memory mem_ptr,
10617 rax_RegI oldval, rRegI newval,
10618 rFlagsReg cr)
10619 %{
10620 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10621 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10622 effect(KILL cr, KILL oldval);
10623
10624 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10625 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10626 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10627 ins_encode %{
10628 __ lock();
10629 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10630 __ setcc(Assembler::equal, $res$$Register);
10631 %}
10632 ins_pipe( pipe_cmpxchg );
10633 %}
10634
10635 instruct compareAndSwapS(rRegI res,
10636 memory mem_ptr,
10637 rax_RegI oldval, rRegI newval,
10638 rFlagsReg cr)
10639 %{
10640 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10641 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10642 effect(KILL cr, KILL oldval);
10643
10644 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10645 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10646 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10647 ins_encode %{
10648 __ lock();
10649 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10650 __ setcc(Assembler::equal, $res$$Register);
10651 %}
10652 ins_pipe( pipe_cmpxchg );
10653 %}
10654
10655 instruct compareAndSwapN(rRegI res,
10656 memory mem_ptr,
10657 rax_RegN oldval, rRegN newval,
10658 rFlagsReg cr) %{
10659 predicate(n->as_LoadStore()->barrier_data() == 0);
10660 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10661 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10662 effect(KILL cr, KILL oldval);
10663
10664 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10665 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10666 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10667 ins_encode %{
10668 __ lock();
10669 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10670 __ setcc(Assembler::equal, $res$$Register);
10671 %}
10672 ins_pipe( pipe_cmpxchg );
10673 %}
10674
10675 instruct compareAndExchangeB(
10676 memory mem_ptr,
10677 rax_RegI oldval, rRegI newval,
10678 rFlagsReg cr)
10679 %{
10680 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10681 effect(KILL cr);
10682
10683 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10684 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10685 ins_encode %{
10686 __ lock();
10687 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10688 %}
10689 ins_pipe( pipe_cmpxchg );
10690 %}
10691
10692 instruct compareAndExchangeS(
10693 memory mem_ptr,
10694 rax_RegI oldval, rRegI newval,
10695 rFlagsReg cr)
10696 %{
10697 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10698 effect(KILL cr);
10699
10700 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10701 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10702 ins_encode %{
10703 __ lock();
10704 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10705 %}
10706 ins_pipe( pipe_cmpxchg );
10707 %}
10708
10709 instruct compareAndExchangeI(
10710 memory mem_ptr,
10711 rax_RegI oldval, rRegI newval,
10712 rFlagsReg cr)
10713 %{
10714 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10715 effect(KILL cr);
10716
10717 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10718 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10719 ins_encode %{
10720 __ lock();
10721 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10722 %}
10723 ins_pipe( pipe_cmpxchg );
10724 %}
10725
10726 instruct compareAndExchangeL(
10727 memory mem_ptr,
10728 rax_RegL oldval, rRegL newval,
10729 rFlagsReg cr)
10730 %{
10731 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10732 effect(KILL cr);
10733
10734 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10735 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10736 ins_encode %{
10737 __ lock();
10738 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10739 %}
10740 ins_pipe( pipe_cmpxchg );
10741 %}
10742
10743 instruct compareAndExchangeN(
10744 memory mem_ptr,
10745 rax_RegN oldval, rRegN newval,
10746 rFlagsReg cr) %{
10747 predicate(n->as_LoadStore()->barrier_data() == 0);
10748 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10749 effect(KILL cr);
10750
10751 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10752 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10753 ins_encode %{
10754 __ lock();
10755 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10756 %}
10757 ins_pipe( pipe_cmpxchg );
10758 %}
10759
10760 instruct compareAndExchangeP(
10761 memory mem_ptr,
10762 rax_RegP oldval, rRegP newval,
10763 rFlagsReg cr)
10764 %{
10765 predicate(n->as_LoadStore()->barrier_data() == 0);
10766 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10767 effect(KILL cr);
10768
10769 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10770 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10771 ins_encode %{
10772 __ lock();
10773 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10774 %}
10775 ins_pipe( pipe_cmpxchg );
10776 %}
10777
10778 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10779 predicate(n->as_LoadStore()->result_not_used());
10780 match(Set dummy (GetAndAddB mem add));
10781 effect(KILL cr);
10782 format %{ "addb_lock $mem, $add" %}
10783 ins_encode %{
10784 __ lock();
10785 __ addb($mem$$Address, $add$$Register);
10786 %}
10787 ins_pipe(pipe_cmpxchg);
10788 %}
10789
10790 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10791 predicate(n->as_LoadStore()->result_not_used());
10792 match(Set dummy (GetAndAddB mem add));
10793 effect(KILL cr);
10794 format %{ "addb_lock $mem, $add" %}
10795 ins_encode %{
10796 __ lock();
10797 __ addb($mem$$Address, $add$$constant);
10798 %}
10799 ins_pipe(pipe_cmpxchg);
10800 %}
10801
10802 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10803 predicate(!n->as_LoadStore()->result_not_used());
10804 match(Set newval (GetAndAddB mem newval));
10805 effect(KILL cr);
10806 format %{ "xaddb_lock $mem, $newval" %}
10807 ins_encode %{
10808 __ lock();
10809 __ xaddb($mem$$Address, $newval$$Register);
10810 %}
10811 ins_pipe(pipe_cmpxchg);
10812 %}
10813
10814 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10815 predicate(n->as_LoadStore()->result_not_used());
10816 match(Set dummy (GetAndAddS mem add));
10817 effect(KILL cr);
10818 format %{ "addw_lock $mem, $add" %}
10819 ins_encode %{
10820 __ lock();
10821 __ addw($mem$$Address, $add$$Register);
10822 %}
10823 ins_pipe(pipe_cmpxchg);
10824 %}
10825
10826 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10827 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10828 match(Set dummy (GetAndAddS mem add));
10829 effect(KILL cr);
10830 format %{ "addw_lock $mem, $add" %}
10831 ins_encode %{
10832 __ lock();
10833 __ addw($mem$$Address, $add$$constant);
10834 %}
10835 ins_pipe(pipe_cmpxchg);
10836 %}
10837
10838 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10839 predicate(!n->as_LoadStore()->result_not_used());
10840 match(Set newval (GetAndAddS mem newval));
10841 effect(KILL cr);
10842 format %{ "xaddw_lock $mem, $newval" %}
10843 ins_encode %{
10844 __ lock();
10845 __ xaddw($mem$$Address, $newval$$Register);
10846 %}
10847 ins_pipe(pipe_cmpxchg);
10848 %}
10849
10850 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10851 predicate(n->as_LoadStore()->result_not_used());
10852 match(Set dummy (GetAndAddI mem add));
10853 effect(KILL cr);
10854 format %{ "addl_lock $mem, $add" %}
10855 ins_encode %{
10856 __ lock();
10857 __ addl($mem$$Address, $add$$Register);
10858 %}
10859 ins_pipe(pipe_cmpxchg);
10860 %}
10861
10862 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10863 predicate(n->as_LoadStore()->result_not_used());
10864 match(Set dummy (GetAndAddI mem add));
10865 effect(KILL cr);
10866 format %{ "addl_lock $mem, $add" %}
10867 ins_encode %{
10868 __ lock();
10869 __ addl($mem$$Address, $add$$constant);
10870 %}
10871 ins_pipe(pipe_cmpxchg);
10872 %}
10873
10874 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10875 predicate(!n->as_LoadStore()->result_not_used());
10876 match(Set newval (GetAndAddI mem newval));
10877 effect(KILL cr);
10878 format %{ "xaddl_lock $mem, $newval" %}
10879 ins_encode %{
10880 __ lock();
10881 __ xaddl($mem$$Address, $newval$$Register);
10882 %}
10883 ins_pipe(pipe_cmpxchg);
10884 %}
10885
10886 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10887 predicate(n->as_LoadStore()->result_not_used());
10888 match(Set dummy (GetAndAddL mem add));
10889 effect(KILL cr);
10890 format %{ "addq_lock $mem, $add" %}
10891 ins_encode %{
10892 __ lock();
10893 __ addq($mem$$Address, $add$$Register);
10894 %}
10895 ins_pipe(pipe_cmpxchg);
10896 %}
10897
10898 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10899 predicate(n->as_LoadStore()->result_not_used());
10900 match(Set dummy (GetAndAddL mem add));
10901 effect(KILL cr);
10902 format %{ "addq_lock $mem, $add" %}
10903 ins_encode %{
10904 __ lock();
10905 __ addq($mem$$Address, $add$$constant);
10906 %}
10907 ins_pipe(pipe_cmpxchg);
10908 %}
10909
10910 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10911 predicate(!n->as_LoadStore()->result_not_used());
10912 match(Set newval (GetAndAddL mem newval));
10913 effect(KILL cr);
10914 format %{ "xaddq_lock $mem, $newval" %}
10915 ins_encode %{
10916 __ lock();
10917 __ xaddq($mem$$Address, $newval$$Register);
10918 %}
10919 ins_pipe(pipe_cmpxchg);
10920 %}
10921
10922 instruct xchgB( memory mem, rRegI newval) %{
10923 match(Set newval (GetAndSetB mem newval));
10924 format %{ "XCHGB $newval,[$mem]" %}
10925 ins_encode %{
10926 __ xchgb($newval$$Register, $mem$$Address);
10927 %}
10928 ins_pipe( pipe_cmpxchg );
10929 %}
10930
10931 instruct xchgS( memory mem, rRegI newval) %{
10932 match(Set newval (GetAndSetS mem newval));
10933 format %{ "XCHGW $newval,[$mem]" %}
10934 ins_encode %{
10935 __ xchgw($newval$$Register, $mem$$Address);
10936 %}
10937 ins_pipe( pipe_cmpxchg );
10938 %}
10939
10940 instruct xchgI( memory mem, rRegI newval) %{
10941 match(Set newval (GetAndSetI mem newval));
10942 format %{ "XCHGL $newval,[$mem]" %}
10943 ins_encode %{
10944 __ xchgl($newval$$Register, $mem$$Address);
10945 %}
10946 ins_pipe( pipe_cmpxchg );
10947 %}
10948
10949 instruct xchgL( memory mem, rRegL newval) %{
10950 match(Set newval (GetAndSetL mem newval));
10951 format %{ "XCHGL $newval,[$mem]" %}
10952 ins_encode %{
10953 __ xchgq($newval$$Register, $mem$$Address);
10954 %}
10955 ins_pipe( pipe_cmpxchg );
10956 %}
10957
10958 instruct xchgP( memory mem, rRegP newval) %{
10959 match(Set newval (GetAndSetP mem newval));
10960 predicate(n->as_LoadStore()->barrier_data() == 0);
10961 format %{ "XCHGQ $newval,[$mem]" %}
10962 ins_encode %{
10963 __ xchgq($newval$$Register, $mem$$Address);
10964 %}
10965 ins_pipe( pipe_cmpxchg );
10966 %}
10967
10968 instruct xchgN( memory mem, rRegN newval) %{
10969 predicate(n->as_LoadStore()->barrier_data() == 0);
10970 match(Set newval (GetAndSetN mem newval));
10971 format %{ "XCHGL $newval,$mem]" %}
10972 ins_encode %{
10973 __ xchgl($newval$$Register, $mem$$Address);
10974 %}
10975 ins_pipe( pipe_cmpxchg );
10976 %}
10977
10978 //----------Abs Instructions-------------------------------------------
10979
10980 // Integer Absolute Instructions
10981 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10982 %{
10983 match(Set dst (AbsI src));
10984 effect(TEMP dst, KILL cr);
10985 format %{ "xorl $dst, $dst\t# abs int\n\t"
10986 "subl $dst, $src\n\t"
10987 "cmovll $dst, $src" %}
10988 ins_encode %{
10989 __ xorl($dst$$Register, $dst$$Register);
10990 __ subl($dst$$Register, $src$$Register);
10991 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10992 %}
10993
10994 ins_pipe(ialu_reg_reg);
10995 %}
10996
10997 // Long Absolute Instructions
10998 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10999 %{
11000 match(Set dst (AbsL src));
11001 effect(TEMP dst, KILL cr);
11002 format %{ "xorl $dst, $dst\t# abs long\n\t"
11003 "subq $dst, $src\n\t"
11004 "cmovlq $dst, $src" %}
11005 ins_encode %{
11006 __ xorl($dst$$Register, $dst$$Register);
11007 __ subq($dst$$Register, $src$$Register);
11008 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11009 %}
11010
11011 ins_pipe(ialu_reg_reg);
11012 %}
11013
11014 //----------Subtraction Instructions-------------------------------------------
11015
11016 // Integer Subtraction Instructions
11017 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11018 %{
11019 predicate(!UseAPX);
11020 match(Set dst (SubI dst src));
11021 effect(KILL cr);
11022 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11023
11024 format %{ "subl $dst, $src\t# int" %}
11025 ins_encode %{
11026 __ subl($dst$$Register, $src$$Register);
11027 %}
11028 ins_pipe(ialu_reg_reg);
11029 %}
11030
11031 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11032 %{
11033 predicate(UseAPX);
11034 match(Set dst (SubI src1 src2));
11035 effect(KILL cr);
11036 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11037
11038 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11039 ins_encode %{
11040 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11041 %}
11042 ins_pipe(ialu_reg_reg);
11043 %}
11044
11045 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11046 %{
11047 predicate(UseAPX);
11048 match(Set dst (SubI src1 src2));
11049 effect(KILL cr);
11050 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11051
11052 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11053 ins_encode %{
11054 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11055 %}
11056 ins_pipe(ialu_reg_reg);
11057 %}
11058
11059 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11060 %{
11061 predicate(UseAPX);
11062 match(Set dst (SubI (LoadI src1) src2));
11063 effect(KILL cr);
11064 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11065
11066 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11067 ins_encode %{
11068 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11069 %}
11070 ins_pipe(ialu_reg_reg);
11071 %}
11072
11073 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11074 %{
11075 predicate(!UseAPX);
11076 match(Set dst (SubI dst (LoadI src)));
11077 effect(KILL cr);
11078 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11079
11080 ins_cost(150);
11081 format %{ "subl $dst, $src\t# int" %}
11082 ins_encode %{
11083 __ subl($dst$$Register, $src$$Address);
11084 %}
11085 ins_pipe(ialu_reg_mem);
11086 %}
11087
11088 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11089 %{
11090 predicate(UseAPX);
11091 match(Set dst (SubI src1 (LoadI src2)));
11092 effect(KILL cr);
11093 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11094
11095 ins_cost(150);
11096 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11097 ins_encode %{
11098 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11099 %}
11100 ins_pipe(ialu_reg_mem);
11101 %}
11102
11103 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11104 %{
11105 predicate(UseAPX);
11106 match(Set dst (SubI (LoadI src1) src2));
11107 effect(KILL cr);
11108 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11109
11110 ins_cost(150);
11111 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11112 ins_encode %{
11113 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11114 %}
11115 ins_pipe(ialu_reg_mem);
11116 %}
11117
11118 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11119 %{
11120 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11121 effect(KILL cr);
11122 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11123
11124 ins_cost(150);
11125 format %{ "subl $dst, $src\t# int" %}
11126 ins_encode %{
11127 __ subl($dst$$Address, $src$$Register);
11128 %}
11129 ins_pipe(ialu_mem_reg);
11130 %}
11131
11132 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11133 %{
11134 predicate(!UseAPX);
11135 match(Set dst (SubL dst src));
11136 effect(KILL cr);
11137 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11138
11139 format %{ "subq $dst, $src\t# long" %}
11140 ins_encode %{
11141 __ subq($dst$$Register, $src$$Register);
11142 %}
11143 ins_pipe(ialu_reg_reg);
11144 %}
11145
11146 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11147 %{
11148 predicate(UseAPX);
11149 match(Set dst (SubL src1 src2));
11150 effect(KILL cr);
11151 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11152
11153 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11154 ins_encode %{
11155 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11156 %}
11157 ins_pipe(ialu_reg_reg);
11158 %}
11159
11160 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11161 %{
11162 predicate(UseAPX);
11163 match(Set dst (SubL src1 src2));
11164 effect(KILL cr);
11165 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11166
11167 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11168 ins_encode %{
11169 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11170 %}
11171 ins_pipe(ialu_reg_reg);
11172 %}
11173
11174 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11175 %{
11176 predicate(UseAPX);
11177 match(Set dst (SubL (LoadL src1) src2));
11178 effect(KILL cr);
11179 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11180
11181 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11182 ins_encode %{
11183 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11184 %}
11185 ins_pipe(ialu_reg_reg);
11186 %}
11187
11188 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11189 %{
11190 predicate(!UseAPX);
11191 match(Set dst (SubL dst (LoadL src)));
11192 effect(KILL cr);
11193 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11194
11195 ins_cost(150);
11196 format %{ "subq $dst, $src\t# long" %}
11197 ins_encode %{
11198 __ subq($dst$$Register, $src$$Address);
11199 %}
11200 ins_pipe(ialu_reg_mem);
11201 %}
11202
11203 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11204 %{
11205 predicate(UseAPX);
11206 match(Set dst (SubL src1 (LoadL src2)));
11207 effect(KILL cr);
11208 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11209
11210 ins_cost(150);
11211 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11212 ins_encode %{
11213 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11214 %}
11215 ins_pipe(ialu_reg_mem);
11216 %}
11217
11218 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11219 %{
11220 predicate(UseAPX);
11221 match(Set dst (SubL (LoadL src1) src2));
11222 effect(KILL cr);
11223 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11224
11225 ins_cost(150);
11226 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11227 ins_encode %{
11228 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11229 %}
11230 ins_pipe(ialu_reg_mem);
11231 %}
11232
11233 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11234 %{
11235 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11236 effect(KILL cr);
11237 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11238
11239 ins_cost(150);
11240 format %{ "subq $dst, $src\t# long" %}
11241 ins_encode %{
11242 __ subq($dst$$Address, $src$$Register);
11243 %}
11244 ins_pipe(ialu_mem_reg);
11245 %}
11246
11247 // Subtract from a pointer
11248 // XXX hmpf???
11249 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11250 %{
11251 match(Set dst (AddP dst (SubI zero src)));
11252 effect(KILL cr);
11253
11254 format %{ "subq $dst, $src\t# ptr - int" %}
11255 ins_encode %{
11256 __ subq($dst$$Register, $src$$Register);
11257 %}
11258 ins_pipe(ialu_reg_reg);
11259 %}
11260
11261 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11262 %{
11263 predicate(!UseAPX);
11264 match(Set dst (SubI zero dst));
11265 effect(KILL cr);
11266 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11267
11268 format %{ "negl $dst\t# int" %}
11269 ins_encode %{
11270 __ negl($dst$$Register);
11271 %}
11272 ins_pipe(ialu_reg);
11273 %}
11274
11275 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11276 %{
11277 predicate(UseAPX);
11278 match(Set dst (SubI zero src));
11279 effect(KILL cr);
11280 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11281
11282 format %{ "enegl $dst, $src\t# int ndd" %}
11283 ins_encode %{
11284 __ enegl($dst$$Register, $src$$Register, false);
11285 %}
11286 ins_pipe(ialu_reg);
11287 %}
11288
11289 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11290 %{
11291 predicate(!UseAPX);
11292 match(Set dst (NegI dst));
11293 effect(KILL cr);
11294 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11295
11296 format %{ "negl $dst\t# int" %}
11297 ins_encode %{
11298 __ negl($dst$$Register);
11299 %}
11300 ins_pipe(ialu_reg);
11301 %}
11302
11303 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11304 %{
11305 predicate(UseAPX);
11306 match(Set dst (NegI src));
11307 effect(KILL cr);
11308 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11309
11310 format %{ "enegl $dst, $src\t# int ndd" %}
11311 ins_encode %{
11312 __ enegl($dst$$Register, $src$$Register, false);
11313 %}
11314 ins_pipe(ialu_reg);
11315 %}
11316
11317 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11318 %{
11319 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11320 effect(KILL cr);
11321 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11322
11323 format %{ "negl $dst\t# int" %}
11324 ins_encode %{
11325 __ negl($dst$$Address);
11326 %}
11327 ins_pipe(ialu_reg);
11328 %}
11329
11330 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11331 %{
11332 predicate(!UseAPX);
11333 match(Set dst (SubL zero dst));
11334 effect(KILL cr);
11335 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11336
11337 format %{ "negq $dst\t# long" %}
11338 ins_encode %{
11339 __ negq($dst$$Register);
11340 %}
11341 ins_pipe(ialu_reg);
11342 %}
11343
11344 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11345 %{
11346 predicate(UseAPX);
11347 match(Set dst (SubL zero src));
11348 effect(KILL cr);
11349 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11350
11351 format %{ "enegq $dst, $src\t# long ndd" %}
11352 ins_encode %{
11353 __ enegq($dst$$Register, $src$$Register, false);
11354 %}
11355 ins_pipe(ialu_reg);
11356 %}
11357
11358 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11359 %{
11360 predicate(!UseAPX);
11361 match(Set dst (NegL dst));
11362 effect(KILL cr);
11363 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11364
11365 format %{ "negq $dst\t# int" %}
11366 ins_encode %{
11367 __ negq($dst$$Register);
11368 %}
11369 ins_pipe(ialu_reg);
11370 %}
11371
11372 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11373 %{
11374 predicate(UseAPX);
11375 match(Set dst (NegL src));
11376 effect(KILL cr);
11377 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11378
11379 format %{ "enegq $dst, $src\t# long ndd" %}
11380 ins_encode %{
11381 __ enegq($dst$$Register, $src$$Register, false);
11382 %}
11383 ins_pipe(ialu_reg);
11384 %}
11385
11386 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11387 %{
11388 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11389 effect(KILL cr);
11390 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391
11392 format %{ "negq $dst\t# long" %}
11393 ins_encode %{
11394 __ negq($dst$$Address);
11395 %}
11396 ins_pipe(ialu_reg);
11397 %}
11398
11399 //----------Multiplication/Division Instructions-------------------------------
11400 // Integer Multiplication Instructions
11401 // Multiply Register
11402
11403 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11404 %{
11405 predicate(!UseAPX);
11406 match(Set dst (MulI dst src));
11407 effect(KILL cr);
11408
11409 ins_cost(300);
11410 format %{ "imull $dst, $src\t# int" %}
11411 ins_encode %{
11412 __ imull($dst$$Register, $src$$Register);
11413 %}
11414 ins_pipe(ialu_reg_reg_alu0);
11415 %}
11416
11417 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11418 %{
11419 predicate(UseAPX);
11420 match(Set dst (MulI src1 src2));
11421 effect(KILL cr);
11422
11423 ins_cost(300);
11424 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11425 ins_encode %{
11426 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11427 %}
11428 ins_pipe(ialu_reg_reg_alu0);
11429 %}
11430
11431 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11432 %{
11433 match(Set dst (MulI src imm));
11434 effect(KILL cr);
11435
11436 ins_cost(300);
11437 format %{ "imull $dst, $src, $imm\t# int" %}
11438 ins_encode %{
11439 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11440 %}
11441 ins_pipe(ialu_reg_reg_alu0);
11442 %}
11443
11444 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11445 %{
11446 predicate(!UseAPX);
11447 match(Set dst (MulI dst (LoadI src)));
11448 effect(KILL cr);
11449
11450 ins_cost(350);
11451 format %{ "imull $dst, $src\t# int" %}
11452 ins_encode %{
11453 __ imull($dst$$Register, $src$$Address);
11454 %}
11455 ins_pipe(ialu_reg_mem_alu0);
11456 %}
11457
11458 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11459 %{
11460 predicate(UseAPX);
11461 match(Set dst (MulI src1 (LoadI src2)));
11462 effect(KILL cr);
11463
11464 ins_cost(350);
11465 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11466 ins_encode %{
11467 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11468 %}
11469 ins_pipe(ialu_reg_mem_alu0);
11470 %}
11471
11472 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11473 %{
11474 match(Set dst (MulI (LoadI src) imm));
11475 effect(KILL cr);
11476
11477 ins_cost(300);
11478 format %{ "imull $dst, $src, $imm\t# int" %}
11479 ins_encode %{
11480 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11481 %}
11482 ins_pipe(ialu_reg_mem_alu0);
11483 %}
11484
11485 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11486 %{
11487 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11488 effect(KILL cr, KILL src2);
11489
11490 expand %{ mulI_rReg(dst, src1, cr);
11491 mulI_rReg(src2, src3, cr);
11492 addI_rReg(dst, src2, cr); %}
11493 %}
11494
11495 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11496 %{
11497 predicate(!UseAPX);
11498 match(Set dst (MulL dst src));
11499 effect(KILL cr);
11500
11501 ins_cost(300);
11502 format %{ "imulq $dst, $src\t# long" %}
11503 ins_encode %{
11504 __ imulq($dst$$Register, $src$$Register);
11505 %}
11506 ins_pipe(ialu_reg_reg_alu0);
11507 %}
11508
11509 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11510 %{
11511 predicate(UseAPX);
11512 match(Set dst (MulL src1 src2));
11513 effect(KILL cr);
11514
11515 ins_cost(300);
11516 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11517 ins_encode %{
11518 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11519 %}
11520 ins_pipe(ialu_reg_reg_alu0);
11521 %}
11522
11523 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11524 %{
11525 match(Set dst (MulL src imm));
11526 effect(KILL cr);
11527
11528 ins_cost(300);
11529 format %{ "imulq $dst, $src, $imm\t# long" %}
11530 ins_encode %{
11531 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11532 %}
11533 ins_pipe(ialu_reg_reg_alu0);
11534 %}
11535
11536 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11537 %{
11538 predicate(!UseAPX);
11539 match(Set dst (MulL dst (LoadL src)));
11540 effect(KILL cr);
11541
11542 ins_cost(350);
11543 format %{ "imulq $dst, $src\t# long" %}
11544 ins_encode %{
11545 __ imulq($dst$$Register, $src$$Address);
11546 %}
11547 ins_pipe(ialu_reg_mem_alu0);
11548 %}
11549
11550 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11551 %{
11552 predicate(UseAPX);
11553 match(Set dst (MulL src1 (LoadL src2)));
11554 effect(KILL cr);
11555
11556 ins_cost(350);
11557 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11558 ins_encode %{
11559 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11560 %}
11561 ins_pipe(ialu_reg_mem_alu0);
11562 %}
11563
11564 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11565 %{
11566 match(Set dst (MulL (LoadL src) imm));
11567 effect(KILL cr);
11568
11569 ins_cost(300);
11570 format %{ "imulq $dst, $src, $imm\t# long" %}
11571 ins_encode %{
11572 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11573 %}
11574 ins_pipe(ialu_reg_mem_alu0);
11575 %}
11576
11577 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11578 %{
11579 match(Set dst (MulHiL src rax));
11580 effect(USE_KILL rax, KILL cr);
11581
11582 ins_cost(300);
11583 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11584 ins_encode %{
11585 __ imulq($src$$Register);
11586 %}
11587 ins_pipe(ialu_reg_reg_alu0);
11588 %}
11589
11590 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11591 %{
11592 match(Set dst (UMulHiL src rax));
11593 effect(USE_KILL rax, KILL cr);
11594
11595 ins_cost(300);
11596 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11597 ins_encode %{
11598 __ mulq($src$$Register);
11599 %}
11600 ins_pipe(ialu_reg_reg_alu0);
11601 %}
11602
11603 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11604 rFlagsReg cr)
11605 %{
11606 match(Set rax (DivI rax div));
11607 effect(KILL rdx, KILL cr);
11608
11609 ins_cost(30*100+10*100); // XXX
11610 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11611 "jne,s normal\n\t"
11612 "xorl rdx, rdx\n\t"
11613 "cmpl $div, -1\n\t"
11614 "je,s done\n"
11615 "normal: cdql\n\t"
11616 "idivl $div\n"
11617 "done:" %}
11618 ins_encode(cdql_enc(div));
11619 ins_pipe(ialu_reg_reg_alu0);
11620 %}
11621
11622 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11623 rFlagsReg cr)
11624 %{
11625 match(Set rax (DivL rax div));
11626 effect(KILL rdx, KILL cr);
11627
11628 ins_cost(30*100+10*100); // XXX
11629 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11630 "cmpq rax, rdx\n\t"
11631 "jne,s normal\n\t"
11632 "xorl rdx, rdx\n\t"
11633 "cmpq $div, -1\n\t"
11634 "je,s done\n"
11635 "normal: cdqq\n\t"
11636 "idivq $div\n"
11637 "done:" %}
11638 ins_encode(cdqq_enc(div));
11639 ins_pipe(ialu_reg_reg_alu0);
11640 %}
11641
11642 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11643 %{
11644 match(Set rax (UDivI rax div));
11645 effect(KILL rdx, KILL cr);
11646
11647 ins_cost(300);
11648 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11649 ins_encode %{
11650 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11651 %}
11652 ins_pipe(ialu_reg_reg_alu0);
11653 %}
11654
11655 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11656 %{
11657 match(Set rax (UDivL rax div));
11658 effect(KILL rdx, KILL cr);
11659
11660 ins_cost(300);
11661 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11662 ins_encode %{
11663 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11664 %}
11665 ins_pipe(ialu_reg_reg_alu0);
11666 %}
11667
11668 // Integer DIVMOD with Register, both quotient and mod results
11669 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11670 rFlagsReg cr)
11671 %{
11672 match(DivModI rax div);
11673 effect(KILL cr);
11674
11675 ins_cost(30*100+10*100); // XXX
11676 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11677 "jne,s normal\n\t"
11678 "xorl rdx, rdx\n\t"
11679 "cmpl $div, -1\n\t"
11680 "je,s done\n"
11681 "normal: cdql\n\t"
11682 "idivl $div\n"
11683 "done:" %}
11684 ins_encode(cdql_enc(div));
11685 ins_pipe(pipe_slow);
11686 %}
11687
11688 // Long DIVMOD with Register, both quotient and mod results
11689 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11690 rFlagsReg cr)
11691 %{
11692 match(DivModL rax div);
11693 effect(KILL cr);
11694
11695 ins_cost(30*100+10*100); // XXX
11696 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11697 "cmpq rax, rdx\n\t"
11698 "jne,s normal\n\t"
11699 "xorl rdx, rdx\n\t"
11700 "cmpq $div, -1\n\t"
11701 "je,s done\n"
11702 "normal: cdqq\n\t"
11703 "idivq $div\n"
11704 "done:" %}
11705 ins_encode(cdqq_enc(div));
11706 ins_pipe(pipe_slow);
11707 %}
11708
11709 // Unsigned integer DIVMOD with Register, both quotient and mod results
11710 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11711 no_rax_rdx_RegI div, rFlagsReg cr)
11712 %{
11713 match(UDivModI rax div);
11714 effect(TEMP tmp, KILL cr);
11715
11716 ins_cost(300);
11717 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11718 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11719 %}
11720 ins_encode %{
11721 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11722 %}
11723 ins_pipe(pipe_slow);
11724 %}
11725
11726 // Unsigned long DIVMOD with Register, both quotient and mod results
11727 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11728 no_rax_rdx_RegL div, rFlagsReg cr)
11729 %{
11730 match(UDivModL rax div);
11731 effect(TEMP tmp, KILL cr);
11732
11733 ins_cost(300);
11734 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11735 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11736 %}
11737 ins_encode %{
11738 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11739 %}
11740 ins_pipe(pipe_slow);
11741 %}
11742
11743 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11744 rFlagsReg cr)
11745 %{
11746 match(Set rdx (ModI rax div));
11747 effect(KILL rax, KILL cr);
11748
11749 ins_cost(300); // XXX
11750 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11751 "jne,s normal\n\t"
11752 "xorl rdx, rdx\n\t"
11753 "cmpl $div, -1\n\t"
11754 "je,s done\n"
11755 "normal: cdql\n\t"
11756 "idivl $div\n"
11757 "done:" %}
11758 ins_encode(cdql_enc(div));
11759 ins_pipe(ialu_reg_reg_alu0);
11760 %}
11761
11762 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11763 rFlagsReg cr)
11764 %{
11765 match(Set rdx (ModL rax div));
11766 effect(KILL rax, KILL cr);
11767
11768 ins_cost(300); // XXX
11769 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11770 "cmpq rax, rdx\n\t"
11771 "jne,s normal\n\t"
11772 "xorl rdx, rdx\n\t"
11773 "cmpq $div, -1\n\t"
11774 "je,s done\n"
11775 "normal: cdqq\n\t"
11776 "idivq $div\n"
11777 "done:" %}
11778 ins_encode(cdqq_enc(div));
11779 ins_pipe(ialu_reg_reg_alu0);
11780 %}
11781
11782 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11783 %{
11784 match(Set rdx (UModI rax div));
11785 effect(KILL rax, KILL cr);
11786
11787 ins_cost(300);
11788 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11789 ins_encode %{
11790 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11791 %}
11792 ins_pipe(ialu_reg_reg_alu0);
11793 %}
11794
11795 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11796 %{
11797 match(Set rdx (UModL rax div));
11798 effect(KILL rax, KILL cr);
11799
11800 ins_cost(300);
11801 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11802 ins_encode %{
11803 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11804 %}
11805 ins_pipe(ialu_reg_reg_alu0);
11806 %}
11807
11808 // Integer Shift Instructions
11809 // Shift Left by one, two, three
11810 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11811 %{
11812 predicate(!UseAPX);
11813 match(Set dst (LShiftI dst shift));
11814 effect(KILL cr);
11815
11816 format %{ "sall $dst, $shift" %}
11817 ins_encode %{
11818 __ sall($dst$$Register, $shift$$constant);
11819 %}
11820 ins_pipe(ialu_reg);
11821 %}
11822
11823 // Shift Left by one, two, three
11824 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11825 %{
11826 predicate(UseAPX);
11827 match(Set dst (LShiftI src shift));
11828 effect(KILL cr);
11829
11830 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11831 ins_encode %{
11832 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11833 %}
11834 ins_pipe(ialu_reg);
11835 %}
11836
11837 // Shift Left by 8-bit immediate
11838 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11839 %{
11840 predicate(!UseAPX);
11841 match(Set dst (LShiftI dst shift));
11842 effect(KILL cr);
11843
11844 format %{ "sall $dst, $shift" %}
11845 ins_encode %{
11846 __ sall($dst$$Register, $shift$$constant);
11847 %}
11848 ins_pipe(ialu_reg);
11849 %}
11850
11851 // Shift Left by 8-bit immediate
11852 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11853 %{
11854 predicate(UseAPX);
11855 match(Set dst (LShiftI src shift));
11856 effect(KILL cr);
11857
11858 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11859 ins_encode %{
11860 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11861 %}
11862 ins_pipe(ialu_reg);
11863 %}
11864
11865 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11866 %{
11867 predicate(UseAPX);
11868 match(Set dst (LShiftI (LoadI src) shift));
11869 effect(KILL cr);
11870
11871 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11872 ins_encode %{
11873 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11874 %}
11875 ins_pipe(ialu_reg);
11876 %}
11877
11878 // Shift Left by 8-bit immediate
11879 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11880 %{
11881 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11882 effect(KILL cr);
11883
11884 format %{ "sall $dst, $shift" %}
11885 ins_encode %{
11886 __ sall($dst$$Address, $shift$$constant);
11887 %}
11888 ins_pipe(ialu_mem_imm);
11889 %}
11890
11891 // Shift Left by variable
11892 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11893 %{
11894 predicate(!VM_Version::supports_bmi2());
11895 match(Set dst (LShiftI dst shift));
11896 effect(KILL cr);
11897
11898 format %{ "sall $dst, $shift" %}
11899 ins_encode %{
11900 __ sall($dst$$Register);
11901 %}
11902 ins_pipe(ialu_reg_reg);
11903 %}
11904
11905 // Shift Left by variable
11906 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11907 %{
11908 predicate(!VM_Version::supports_bmi2());
11909 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11910 effect(KILL cr);
11911
11912 format %{ "sall $dst, $shift" %}
11913 ins_encode %{
11914 __ sall($dst$$Address);
11915 %}
11916 ins_pipe(ialu_mem_reg);
11917 %}
11918
11919 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11920 %{
11921 predicate(VM_Version::supports_bmi2());
11922 match(Set dst (LShiftI src shift));
11923
11924 format %{ "shlxl $dst, $src, $shift" %}
11925 ins_encode %{
11926 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11927 %}
11928 ins_pipe(ialu_reg_reg);
11929 %}
11930
11931 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11932 %{
11933 predicate(VM_Version::supports_bmi2());
11934 match(Set dst (LShiftI (LoadI src) shift));
11935 ins_cost(175);
11936 format %{ "shlxl $dst, $src, $shift" %}
11937 ins_encode %{
11938 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11939 %}
11940 ins_pipe(ialu_reg_mem);
11941 %}
11942
11943 // Arithmetic Shift Right by 8-bit immediate
11944 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11945 %{
11946 predicate(!UseAPX);
11947 match(Set dst (RShiftI dst shift));
11948 effect(KILL cr);
11949
11950 format %{ "sarl $dst, $shift" %}
11951 ins_encode %{
11952 __ sarl($dst$$Register, $shift$$constant);
11953 %}
11954 ins_pipe(ialu_mem_imm);
11955 %}
11956
11957 // Arithmetic Shift Right by 8-bit immediate
11958 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11959 %{
11960 predicate(UseAPX);
11961 match(Set dst (RShiftI src shift));
11962 effect(KILL cr);
11963
11964 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11965 ins_encode %{
11966 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11967 %}
11968 ins_pipe(ialu_mem_imm);
11969 %}
11970
11971 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11972 %{
11973 predicate(UseAPX);
11974 match(Set dst (RShiftI (LoadI src) shift));
11975 effect(KILL cr);
11976
11977 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11978 ins_encode %{
11979 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11980 %}
11981 ins_pipe(ialu_mem_imm);
11982 %}
11983
11984 // Arithmetic Shift Right by 8-bit immediate
11985 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11986 %{
11987 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11988 effect(KILL cr);
11989
11990 format %{ "sarl $dst, $shift" %}
11991 ins_encode %{
11992 __ sarl($dst$$Address, $shift$$constant);
11993 %}
11994 ins_pipe(ialu_mem_imm);
11995 %}
11996
11997 // Arithmetic Shift Right by variable
11998 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11999 %{
12000 predicate(!VM_Version::supports_bmi2());
12001 match(Set dst (RShiftI dst shift));
12002 effect(KILL cr);
12003
12004 format %{ "sarl $dst, $shift" %}
12005 ins_encode %{
12006 __ sarl($dst$$Register);
12007 %}
12008 ins_pipe(ialu_reg_reg);
12009 %}
12010
12011 // Arithmetic Shift Right by variable
12012 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12013 %{
12014 predicate(!VM_Version::supports_bmi2());
12015 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12016 effect(KILL cr);
12017
12018 format %{ "sarl $dst, $shift" %}
12019 ins_encode %{
12020 __ sarl($dst$$Address);
12021 %}
12022 ins_pipe(ialu_mem_reg);
12023 %}
12024
12025 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12026 %{
12027 predicate(VM_Version::supports_bmi2());
12028 match(Set dst (RShiftI src shift));
12029
12030 format %{ "sarxl $dst, $src, $shift" %}
12031 ins_encode %{
12032 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12033 %}
12034 ins_pipe(ialu_reg_reg);
12035 %}
12036
12037 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12038 %{
12039 predicate(VM_Version::supports_bmi2());
12040 match(Set dst (RShiftI (LoadI src) shift));
12041 ins_cost(175);
12042 format %{ "sarxl $dst, $src, $shift" %}
12043 ins_encode %{
12044 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12045 %}
12046 ins_pipe(ialu_reg_mem);
12047 %}
12048
12049 // Logical Shift Right by 8-bit immediate
12050 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12051 %{
12052 predicate(!UseAPX);
12053 match(Set dst (URShiftI dst shift));
12054 effect(KILL cr);
12055
12056 format %{ "shrl $dst, $shift" %}
12057 ins_encode %{
12058 __ shrl($dst$$Register, $shift$$constant);
12059 %}
12060 ins_pipe(ialu_reg);
12061 %}
12062
12063 // Logical Shift Right by 8-bit immediate
12064 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12065 %{
12066 predicate(UseAPX);
12067 match(Set dst (URShiftI src shift));
12068 effect(KILL cr);
12069
12070 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12071 ins_encode %{
12072 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12073 %}
12074 ins_pipe(ialu_reg);
12075 %}
12076
12077 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12078 %{
12079 predicate(UseAPX);
12080 match(Set dst (URShiftI (LoadI src) shift));
12081 effect(KILL cr);
12082
12083 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12084 ins_encode %{
12085 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12086 %}
12087 ins_pipe(ialu_reg);
12088 %}
12089
12090 // Logical Shift Right by 8-bit immediate
12091 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12092 %{
12093 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12094 effect(KILL cr);
12095
12096 format %{ "shrl $dst, $shift" %}
12097 ins_encode %{
12098 __ shrl($dst$$Address, $shift$$constant);
12099 %}
12100 ins_pipe(ialu_mem_imm);
12101 %}
12102
12103 // Logical Shift Right by variable
12104 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12105 %{
12106 predicate(!VM_Version::supports_bmi2());
12107 match(Set dst (URShiftI dst shift));
12108 effect(KILL cr);
12109
12110 format %{ "shrl $dst, $shift" %}
12111 ins_encode %{
12112 __ shrl($dst$$Register);
12113 %}
12114 ins_pipe(ialu_reg_reg);
12115 %}
12116
12117 // Logical Shift Right by variable
12118 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12119 %{
12120 predicate(!VM_Version::supports_bmi2());
12121 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12122 effect(KILL cr);
12123
12124 format %{ "shrl $dst, $shift" %}
12125 ins_encode %{
12126 __ shrl($dst$$Address);
12127 %}
12128 ins_pipe(ialu_mem_reg);
12129 %}
12130
12131 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12132 %{
12133 predicate(VM_Version::supports_bmi2());
12134 match(Set dst (URShiftI src shift));
12135
12136 format %{ "shrxl $dst, $src, $shift" %}
12137 ins_encode %{
12138 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12139 %}
12140 ins_pipe(ialu_reg_reg);
12141 %}
12142
12143 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12144 %{
12145 predicate(VM_Version::supports_bmi2());
12146 match(Set dst (URShiftI (LoadI src) shift));
12147 ins_cost(175);
12148 format %{ "shrxl $dst, $src, $shift" %}
12149 ins_encode %{
12150 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12151 %}
12152 ins_pipe(ialu_reg_mem);
12153 %}
12154
12155 // Long Shift Instructions
12156 // Shift Left by one, two, three
12157 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12158 %{
12159 predicate(!UseAPX);
12160 match(Set dst (LShiftL dst shift));
12161 effect(KILL cr);
12162
12163 format %{ "salq $dst, $shift" %}
12164 ins_encode %{
12165 __ salq($dst$$Register, $shift$$constant);
12166 %}
12167 ins_pipe(ialu_reg);
12168 %}
12169
12170 // Shift Left by one, two, three
12171 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12172 %{
12173 predicate(UseAPX);
12174 match(Set dst (LShiftL src shift));
12175 effect(KILL cr);
12176
12177 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12178 ins_encode %{
12179 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12180 %}
12181 ins_pipe(ialu_reg);
12182 %}
12183
12184 // Shift Left by 8-bit immediate
12185 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12186 %{
12187 predicate(!UseAPX);
12188 match(Set dst (LShiftL dst shift));
12189 effect(KILL cr);
12190
12191 format %{ "salq $dst, $shift" %}
12192 ins_encode %{
12193 __ salq($dst$$Register, $shift$$constant);
12194 %}
12195 ins_pipe(ialu_reg);
12196 %}
12197
12198 // Shift Left by 8-bit immediate
12199 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12200 %{
12201 predicate(UseAPX);
12202 match(Set dst (LShiftL src shift));
12203 effect(KILL cr);
12204
12205 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12206 ins_encode %{
12207 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12208 %}
12209 ins_pipe(ialu_reg);
12210 %}
12211
12212 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12213 %{
12214 predicate(UseAPX);
12215 match(Set dst (LShiftL (LoadL src) shift));
12216 effect(KILL cr);
12217
12218 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12219 ins_encode %{
12220 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12221 %}
12222 ins_pipe(ialu_reg);
12223 %}
12224
12225 // Shift Left by 8-bit immediate
12226 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12227 %{
12228 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12229 effect(KILL cr);
12230
12231 format %{ "salq $dst, $shift" %}
12232 ins_encode %{
12233 __ salq($dst$$Address, $shift$$constant);
12234 %}
12235 ins_pipe(ialu_mem_imm);
12236 %}
12237
12238 // Shift Left by variable
12239 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12240 %{
12241 predicate(!VM_Version::supports_bmi2());
12242 match(Set dst (LShiftL dst shift));
12243 effect(KILL cr);
12244
12245 format %{ "salq $dst, $shift" %}
12246 ins_encode %{
12247 __ salq($dst$$Register);
12248 %}
12249 ins_pipe(ialu_reg_reg);
12250 %}
12251
12252 // Shift Left by variable
12253 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12254 %{
12255 predicate(!VM_Version::supports_bmi2());
12256 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12257 effect(KILL cr);
12258
12259 format %{ "salq $dst, $shift" %}
12260 ins_encode %{
12261 __ salq($dst$$Address);
12262 %}
12263 ins_pipe(ialu_mem_reg);
12264 %}
12265
12266 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12267 %{
12268 predicate(VM_Version::supports_bmi2());
12269 match(Set dst (LShiftL src shift));
12270
12271 format %{ "shlxq $dst, $src, $shift" %}
12272 ins_encode %{
12273 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12274 %}
12275 ins_pipe(ialu_reg_reg);
12276 %}
12277
12278 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12279 %{
12280 predicate(VM_Version::supports_bmi2());
12281 match(Set dst (LShiftL (LoadL src) shift));
12282 ins_cost(175);
12283 format %{ "shlxq $dst, $src, $shift" %}
12284 ins_encode %{
12285 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12286 %}
12287 ins_pipe(ialu_reg_mem);
12288 %}
12289
12290 // Arithmetic Shift Right by 8-bit immediate
12291 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12292 %{
12293 predicate(!UseAPX);
12294 match(Set dst (RShiftL dst shift));
12295 effect(KILL cr);
12296
12297 format %{ "sarq $dst, $shift" %}
12298 ins_encode %{
12299 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12300 %}
12301 ins_pipe(ialu_mem_imm);
12302 %}
12303
12304 // Arithmetic Shift Right by 8-bit immediate
12305 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12306 %{
12307 predicate(UseAPX);
12308 match(Set dst (RShiftL src shift));
12309 effect(KILL cr);
12310
12311 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12312 ins_encode %{
12313 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12314 %}
12315 ins_pipe(ialu_mem_imm);
12316 %}
12317
12318 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12319 %{
12320 predicate(UseAPX);
12321 match(Set dst (RShiftL (LoadL src) shift));
12322 effect(KILL cr);
12323
12324 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12325 ins_encode %{
12326 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12327 %}
12328 ins_pipe(ialu_mem_imm);
12329 %}
12330
12331 // Arithmetic Shift Right by 8-bit immediate
12332 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12333 %{
12334 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12335 effect(KILL cr);
12336
12337 format %{ "sarq $dst, $shift" %}
12338 ins_encode %{
12339 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12340 %}
12341 ins_pipe(ialu_mem_imm);
12342 %}
12343
12344 // Arithmetic Shift Right by variable
12345 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12346 %{
12347 predicate(!VM_Version::supports_bmi2());
12348 match(Set dst (RShiftL dst shift));
12349 effect(KILL cr);
12350
12351 format %{ "sarq $dst, $shift" %}
12352 ins_encode %{
12353 __ sarq($dst$$Register);
12354 %}
12355 ins_pipe(ialu_reg_reg);
12356 %}
12357
12358 // Arithmetic Shift Right by variable
12359 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12360 %{
12361 predicate(!VM_Version::supports_bmi2());
12362 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12363 effect(KILL cr);
12364
12365 format %{ "sarq $dst, $shift" %}
12366 ins_encode %{
12367 __ sarq($dst$$Address);
12368 %}
12369 ins_pipe(ialu_mem_reg);
12370 %}
12371
12372 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12373 %{
12374 predicate(VM_Version::supports_bmi2());
12375 match(Set dst (RShiftL src shift));
12376
12377 format %{ "sarxq $dst, $src, $shift" %}
12378 ins_encode %{
12379 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12380 %}
12381 ins_pipe(ialu_reg_reg);
12382 %}
12383
12384 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12385 %{
12386 predicate(VM_Version::supports_bmi2());
12387 match(Set dst (RShiftL (LoadL src) shift));
12388 ins_cost(175);
12389 format %{ "sarxq $dst, $src, $shift" %}
12390 ins_encode %{
12391 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12392 %}
12393 ins_pipe(ialu_reg_mem);
12394 %}
12395
12396 // Logical Shift Right by 8-bit immediate
12397 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12398 %{
12399 predicate(!UseAPX);
12400 match(Set dst (URShiftL dst shift));
12401 effect(KILL cr);
12402
12403 format %{ "shrq $dst, $shift" %}
12404 ins_encode %{
12405 __ shrq($dst$$Register, $shift$$constant);
12406 %}
12407 ins_pipe(ialu_reg);
12408 %}
12409
12410 // Logical Shift Right by 8-bit immediate
12411 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12412 %{
12413 predicate(UseAPX);
12414 match(Set dst (URShiftL src shift));
12415 effect(KILL cr);
12416
12417 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12418 ins_encode %{
12419 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12420 %}
12421 ins_pipe(ialu_reg);
12422 %}
12423
12424 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12425 %{
12426 predicate(UseAPX);
12427 match(Set dst (URShiftL (LoadL src) shift));
12428 effect(KILL cr);
12429
12430 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12431 ins_encode %{
12432 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12433 %}
12434 ins_pipe(ialu_reg);
12435 %}
12436
12437 // Logical Shift Right by 8-bit immediate
12438 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12439 %{
12440 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12441 effect(KILL cr);
12442
12443 format %{ "shrq $dst, $shift" %}
12444 ins_encode %{
12445 __ shrq($dst$$Address, $shift$$constant);
12446 %}
12447 ins_pipe(ialu_mem_imm);
12448 %}
12449
12450 // Logical Shift Right by variable
12451 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12452 %{
12453 predicate(!VM_Version::supports_bmi2());
12454 match(Set dst (URShiftL dst shift));
12455 effect(KILL cr);
12456
12457 format %{ "shrq $dst, $shift" %}
12458 ins_encode %{
12459 __ shrq($dst$$Register);
12460 %}
12461 ins_pipe(ialu_reg_reg);
12462 %}
12463
12464 // Logical Shift Right by variable
12465 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12466 %{
12467 predicate(!VM_Version::supports_bmi2());
12468 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12469 effect(KILL cr);
12470
12471 format %{ "shrq $dst, $shift" %}
12472 ins_encode %{
12473 __ shrq($dst$$Address);
12474 %}
12475 ins_pipe(ialu_mem_reg);
12476 %}
12477
12478 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12479 %{
12480 predicate(VM_Version::supports_bmi2());
12481 match(Set dst (URShiftL src shift));
12482
12483 format %{ "shrxq $dst, $src, $shift" %}
12484 ins_encode %{
12485 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12486 %}
12487 ins_pipe(ialu_reg_reg);
12488 %}
12489
12490 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12491 %{
12492 predicate(VM_Version::supports_bmi2());
12493 match(Set dst (URShiftL (LoadL src) shift));
12494 ins_cost(175);
12495 format %{ "shrxq $dst, $src, $shift" %}
12496 ins_encode %{
12497 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12498 %}
12499 ins_pipe(ialu_reg_mem);
12500 %}
12501
12502 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12503 // This idiom is used by the compiler for the i2b bytecode.
12504 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12505 %{
12506 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12507
12508 format %{ "movsbl $dst, $src\t# i2b" %}
12509 ins_encode %{
12510 __ movsbl($dst$$Register, $src$$Register);
12511 %}
12512 ins_pipe(ialu_reg_reg);
12513 %}
12514
12515 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12516 // This idiom is used by the compiler the i2s bytecode.
12517 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12518 %{
12519 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12520
12521 format %{ "movswl $dst, $src\t# i2s" %}
12522 ins_encode %{
12523 __ movswl($dst$$Register, $src$$Register);
12524 %}
12525 ins_pipe(ialu_reg_reg);
12526 %}
12527
12528 // ROL/ROR instructions
12529
12530 // Rotate left by constant.
12531 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12532 %{
12533 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12534 match(Set dst (RotateLeft dst shift));
12535 effect(KILL cr);
12536 format %{ "roll $dst, $shift" %}
12537 ins_encode %{
12538 __ roll($dst$$Register, $shift$$constant);
12539 %}
12540 ins_pipe(ialu_reg);
12541 %}
12542
12543 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12544 %{
12545 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12546 match(Set dst (RotateLeft src shift));
12547 format %{ "rolxl $dst, $src, $shift" %}
12548 ins_encode %{
12549 int shift = 32 - ($shift$$constant & 31);
12550 __ rorxl($dst$$Register, $src$$Register, shift);
12551 %}
12552 ins_pipe(ialu_reg_reg);
12553 %}
12554
12555 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12556 %{
12557 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12558 match(Set dst (RotateLeft (LoadI src) shift));
12559 ins_cost(175);
12560 format %{ "rolxl $dst, $src, $shift" %}
12561 ins_encode %{
12562 int shift = 32 - ($shift$$constant & 31);
12563 __ rorxl($dst$$Register, $src$$Address, shift);
12564 %}
12565 ins_pipe(ialu_reg_mem);
12566 %}
12567
12568 // Rotate Left by variable
12569 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12570 %{
12571 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12572 match(Set dst (RotateLeft dst shift));
12573 effect(KILL cr);
12574 format %{ "roll $dst, $shift" %}
12575 ins_encode %{
12576 __ roll($dst$$Register);
12577 %}
12578 ins_pipe(ialu_reg_reg);
12579 %}
12580
12581 // Rotate Left by variable
12582 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12583 %{
12584 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12585 match(Set dst (RotateLeft src shift));
12586 effect(KILL cr);
12587
12588 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12589 ins_encode %{
12590 __ eroll($dst$$Register, $src$$Register, false);
12591 %}
12592 ins_pipe(ialu_reg_reg);
12593 %}
12594
12595 // Rotate Right by constant.
12596 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12597 %{
12598 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12599 match(Set dst (RotateRight dst shift));
12600 effect(KILL cr);
12601 format %{ "rorl $dst, $shift" %}
12602 ins_encode %{
12603 __ rorl($dst$$Register, $shift$$constant);
12604 %}
12605 ins_pipe(ialu_reg);
12606 %}
12607
12608 // Rotate Right by constant.
12609 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12610 %{
12611 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12612 match(Set dst (RotateRight src shift));
12613 format %{ "rorxl $dst, $src, $shift" %}
12614 ins_encode %{
12615 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12616 %}
12617 ins_pipe(ialu_reg_reg);
12618 %}
12619
12620 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12621 %{
12622 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12623 match(Set dst (RotateRight (LoadI src) shift));
12624 ins_cost(175);
12625 format %{ "rorxl $dst, $src, $shift" %}
12626 ins_encode %{
12627 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12628 %}
12629 ins_pipe(ialu_reg_mem);
12630 %}
12631
12632 // Rotate Right by variable
12633 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12634 %{
12635 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12636 match(Set dst (RotateRight dst shift));
12637 effect(KILL cr);
12638 format %{ "rorl $dst, $shift" %}
12639 ins_encode %{
12640 __ rorl($dst$$Register);
12641 %}
12642 ins_pipe(ialu_reg_reg);
12643 %}
12644
12645 // Rotate Right by variable
12646 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12647 %{
12648 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12649 match(Set dst (RotateRight src shift));
12650 effect(KILL cr);
12651
12652 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12653 ins_encode %{
12654 __ erorl($dst$$Register, $src$$Register, false);
12655 %}
12656 ins_pipe(ialu_reg_reg);
12657 %}
12658
12659 // Rotate Left by constant.
12660 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12661 %{
12662 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12663 match(Set dst (RotateLeft dst shift));
12664 effect(KILL cr);
12665 format %{ "rolq $dst, $shift" %}
12666 ins_encode %{
12667 __ rolq($dst$$Register, $shift$$constant);
12668 %}
12669 ins_pipe(ialu_reg);
12670 %}
12671
12672 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12673 %{
12674 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12675 match(Set dst (RotateLeft src shift));
12676 format %{ "rolxq $dst, $src, $shift" %}
12677 ins_encode %{
12678 int shift = 64 - ($shift$$constant & 63);
12679 __ rorxq($dst$$Register, $src$$Register, shift);
12680 %}
12681 ins_pipe(ialu_reg_reg);
12682 %}
12683
12684 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12685 %{
12686 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12687 match(Set dst (RotateLeft (LoadL src) shift));
12688 ins_cost(175);
12689 format %{ "rolxq $dst, $src, $shift" %}
12690 ins_encode %{
12691 int shift = 64 - ($shift$$constant & 63);
12692 __ rorxq($dst$$Register, $src$$Address, shift);
12693 %}
12694 ins_pipe(ialu_reg_mem);
12695 %}
12696
12697 // Rotate Left by variable
12698 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12699 %{
12700 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12701 match(Set dst (RotateLeft dst shift));
12702 effect(KILL cr);
12703 format %{ "rolq $dst, $shift" %}
12704 ins_encode %{
12705 __ rolq($dst$$Register);
12706 %}
12707 ins_pipe(ialu_reg_reg);
12708 %}
12709
12710 // Rotate Left by variable
12711 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12712 %{
12713 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12714 match(Set dst (RotateLeft src shift));
12715 effect(KILL cr);
12716
12717 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12718 ins_encode %{
12719 __ erolq($dst$$Register, $src$$Register, false);
12720 %}
12721 ins_pipe(ialu_reg_reg);
12722 %}
12723
12724 // Rotate Right by constant.
12725 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12726 %{
12727 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12728 match(Set dst (RotateRight dst shift));
12729 effect(KILL cr);
12730 format %{ "rorq $dst, $shift" %}
12731 ins_encode %{
12732 __ rorq($dst$$Register, $shift$$constant);
12733 %}
12734 ins_pipe(ialu_reg);
12735 %}
12736
12737 // Rotate Right by constant
12738 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12739 %{
12740 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12741 match(Set dst (RotateRight src shift));
12742 format %{ "rorxq $dst, $src, $shift" %}
12743 ins_encode %{
12744 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12745 %}
12746 ins_pipe(ialu_reg_reg);
12747 %}
12748
12749 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12750 %{
12751 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12752 match(Set dst (RotateRight (LoadL src) shift));
12753 ins_cost(175);
12754 format %{ "rorxq $dst, $src, $shift" %}
12755 ins_encode %{
12756 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12757 %}
12758 ins_pipe(ialu_reg_mem);
12759 %}
12760
12761 // Rotate Right by variable
12762 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12763 %{
12764 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12765 match(Set dst (RotateRight dst shift));
12766 effect(KILL cr);
12767 format %{ "rorq $dst, $shift" %}
12768 ins_encode %{
12769 __ rorq($dst$$Register);
12770 %}
12771 ins_pipe(ialu_reg_reg);
12772 %}
12773
12774 // Rotate Right by variable
12775 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12776 %{
12777 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12778 match(Set dst (RotateRight src shift));
12779 effect(KILL cr);
12780
12781 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12782 ins_encode %{
12783 __ erorq($dst$$Register, $src$$Register, false);
12784 %}
12785 ins_pipe(ialu_reg_reg);
12786 %}
12787
12788 //----------------------------- CompressBits/ExpandBits ------------------------
12789
12790 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12791 predicate(n->bottom_type()->isa_long());
12792 match(Set dst (CompressBits src mask));
12793 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12794 ins_encode %{
12795 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12796 %}
12797 ins_pipe( pipe_slow );
12798 %}
12799
12800 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12801 predicate(n->bottom_type()->isa_long());
12802 match(Set dst (ExpandBits src mask));
12803 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12804 ins_encode %{
12805 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12806 %}
12807 ins_pipe( pipe_slow );
12808 %}
12809
12810 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12811 predicate(n->bottom_type()->isa_long());
12812 match(Set dst (CompressBits src (LoadL mask)));
12813 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12814 ins_encode %{
12815 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12816 %}
12817 ins_pipe( pipe_slow );
12818 %}
12819
12820 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12821 predicate(n->bottom_type()->isa_long());
12822 match(Set dst (ExpandBits src (LoadL mask)));
12823 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12824 ins_encode %{
12825 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12826 %}
12827 ins_pipe( pipe_slow );
12828 %}
12829
12830
12831 // Logical Instructions
12832
12833 // Integer Logical Instructions
12834
12835 // And Instructions
12836 // And Register with Register
12837 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12838 %{
12839 predicate(!UseAPX);
12840 match(Set dst (AndI dst src));
12841 effect(KILL cr);
12842 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12843
12844 format %{ "andl $dst, $src\t# int" %}
12845 ins_encode %{
12846 __ andl($dst$$Register, $src$$Register);
12847 %}
12848 ins_pipe(ialu_reg_reg);
12849 %}
12850
12851 // And Register with Register using New Data Destination (NDD)
12852 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12853 %{
12854 predicate(UseAPX);
12855 match(Set dst (AndI src1 src2));
12856 effect(KILL cr);
12857 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12858
12859 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12860 ins_encode %{
12861 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12862
12863 %}
12864 ins_pipe(ialu_reg_reg);
12865 %}
12866
12867 // And Register with Immediate 255
12868 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12869 %{
12870 match(Set dst (AndI src mask));
12871
12872 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12873 ins_encode %{
12874 __ movzbl($dst$$Register, $src$$Register);
12875 %}
12876 ins_pipe(ialu_reg);
12877 %}
12878
12879 // And Register with Immediate 255 and promote to long
12880 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12881 %{
12882 match(Set dst (ConvI2L (AndI src mask)));
12883
12884 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12885 ins_encode %{
12886 __ movzbl($dst$$Register, $src$$Register);
12887 %}
12888 ins_pipe(ialu_reg);
12889 %}
12890
12891 // And Register with Immediate 65535
12892 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12893 %{
12894 match(Set dst (AndI src mask));
12895
12896 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12897 ins_encode %{
12898 __ movzwl($dst$$Register, $src$$Register);
12899 %}
12900 ins_pipe(ialu_reg);
12901 %}
12902
12903 // And Register with Immediate 65535 and promote to long
12904 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12905 %{
12906 match(Set dst (ConvI2L (AndI src mask)));
12907
12908 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12909 ins_encode %{
12910 __ movzwl($dst$$Register, $src$$Register);
12911 %}
12912 ins_pipe(ialu_reg);
12913 %}
12914
12915 // Can skip int2long conversions after AND with small bitmask
12916 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12917 %{
12918 predicate(VM_Version::supports_bmi2());
12919 ins_cost(125);
12920 effect(TEMP tmp, KILL cr);
12921 match(Set dst (ConvI2L (AndI src mask)));
12922 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12923 ins_encode %{
12924 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12925 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12926 %}
12927 ins_pipe(ialu_reg_reg);
12928 %}
12929
12930 // And Register with Immediate
12931 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12932 %{
12933 predicate(!UseAPX);
12934 match(Set dst (AndI dst src));
12935 effect(KILL cr);
12936 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12937
12938 format %{ "andl $dst, $src\t# int" %}
12939 ins_encode %{
12940 __ andl($dst$$Register, $src$$constant);
12941 %}
12942 ins_pipe(ialu_reg);
12943 %}
12944
12945 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12946 %{
12947 predicate(UseAPX);
12948 match(Set dst (AndI src1 src2));
12949 effect(KILL cr);
12950 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12951
12952 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12953 ins_encode %{
12954 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12955 %}
12956 ins_pipe(ialu_reg);
12957 %}
12958
12959 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12960 %{
12961 predicate(UseAPX);
12962 match(Set dst (AndI (LoadI src1) src2));
12963 effect(KILL cr);
12964 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12965
12966 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12967 ins_encode %{
12968 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12969 %}
12970 ins_pipe(ialu_reg);
12971 %}
12972
12973 // And Register with Memory
12974 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12975 %{
12976 predicate(!UseAPX);
12977 match(Set dst (AndI dst (LoadI src)));
12978 effect(KILL cr);
12979 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12980
12981 ins_cost(150);
12982 format %{ "andl $dst, $src\t# int" %}
12983 ins_encode %{
12984 __ andl($dst$$Register, $src$$Address);
12985 %}
12986 ins_pipe(ialu_reg_mem);
12987 %}
12988
12989 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12990 %{
12991 predicate(UseAPX);
12992 match(Set dst (AndI src1 (LoadI src2)));
12993 effect(KILL cr);
12994 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12995
12996 ins_cost(150);
12997 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12998 ins_encode %{
12999 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13000 %}
13001 ins_pipe(ialu_reg_mem);
13002 %}
13003
13004 // And Memory with Register
13005 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13006 %{
13007 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13008 effect(KILL cr);
13009 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13010
13011 ins_cost(150);
13012 format %{ "andb $dst, $src\t# byte" %}
13013 ins_encode %{
13014 __ andb($dst$$Address, $src$$Register);
13015 %}
13016 ins_pipe(ialu_mem_reg);
13017 %}
13018
13019 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13020 %{
13021 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13022 effect(KILL cr);
13023 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13024
13025 ins_cost(150);
13026 format %{ "andl $dst, $src\t# int" %}
13027 ins_encode %{
13028 __ andl($dst$$Address, $src$$Register);
13029 %}
13030 ins_pipe(ialu_mem_reg);
13031 %}
13032
13033 // And Memory with Immediate
13034 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13035 %{
13036 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13037 effect(KILL cr);
13038 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13039
13040 ins_cost(125);
13041 format %{ "andl $dst, $src\t# int" %}
13042 ins_encode %{
13043 __ andl($dst$$Address, $src$$constant);
13044 %}
13045 ins_pipe(ialu_mem_imm);
13046 %}
13047
13048 // BMI1 instructions
13049 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13050 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13051 predicate(UseBMI1Instructions);
13052 effect(KILL cr);
13053 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13054
13055 ins_cost(125);
13056 format %{ "andnl $dst, $src1, $src2" %}
13057
13058 ins_encode %{
13059 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13060 %}
13061 ins_pipe(ialu_reg_mem);
13062 %}
13063
13064 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13065 match(Set dst (AndI (XorI src1 minus_1) src2));
13066 predicate(UseBMI1Instructions);
13067 effect(KILL cr);
13068 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13069
13070 format %{ "andnl $dst, $src1, $src2" %}
13071
13072 ins_encode %{
13073 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13074 %}
13075 ins_pipe(ialu_reg);
13076 %}
13077
13078 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13079 match(Set dst (AndI (SubI imm_zero src) src));
13080 predicate(UseBMI1Instructions);
13081 effect(KILL cr);
13082 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13083
13084 format %{ "blsil $dst, $src" %}
13085
13086 ins_encode %{
13087 __ blsil($dst$$Register, $src$$Register);
13088 %}
13089 ins_pipe(ialu_reg);
13090 %}
13091
13092 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13093 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13094 predicate(UseBMI1Instructions);
13095 effect(KILL cr);
13096 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13097
13098 ins_cost(125);
13099 format %{ "blsil $dst, $src" %}
13100
13101 ins_encode %{
13102 __ blsil($dst$$Register, $src$$Address);
13103 %}
13104 ins_pipe(ialu_reg_mem);
13105 %}
13106
13107 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13108 %{
13109 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13110 predicate(UseBMI1Instructions);
13111 effect(KILL cr);
13112 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13113
13114 ins_cost(125);
13115 format %{ "blsmskl $dst, $src" %}
13116
13117 ins_encode %{
13118 __ blsmskl($dst$$Register, $src$$Address);
13119 %}
13120 ins_pipe(ialu_reg_mem);
13121 %}
13122
13123 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13124 %{
13125 match(Set dst (XorI (AddI src minus_1) src));
13126 predicate(UseBMI1Instructions);
13127 effect(KILL cr);
13128 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13129
13130 format %{ "blsmskl $dst, $src" %}
13131
13132 ins_encode %{
13133 __ blsmskl($dst$$Register, $src$$Register);
13134 %}
13135
13136 ins_pipe(ialu_reg);
13137 %}
13138
13139 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13140 %{
13141 match(Set dst (AndI (AddI src minus_1) src) );
13142 predicate(UseBMI1Instructions);
13143 effect(KILL cr);
13144 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13145
13146 format %{ "blsrl $dst, $src" %}
13147
13148 ins_encode %{
13149 __ blsrl($dst$$Register, $src$$Register);
13150 %}
13151
13152 ins_pipe(ialu_reg_mem);
13153 %}
13154
13155 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13156 %{
13157 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13158 predicate(UseBMI1Instructions);
13159 effect(KILL cr);
13160 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13161
13162 ins_cost(125);
13163 format %{ "blsrl $dst, $src" %}
13164
13165 ins_encode %{
13166 __ blsrl($dst$$Register, $src$$Address);
13167 %}
13168
13169 ins_pipe(ialu_reg);
13170 %}
13171
13172 // Or Instructions
13173 // Or Register with Register
13174 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13175 %{
13176 predicate(!UseAPX);
13177 match(Set dst (OrI dst src));
13178 effect(KILL cr);
13179 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13180
13181 format %{ "orl $dst, $src\t# int" %}
13182 ins_encode %{
13183 __ orl($dst$$Register, $src$$Register);
13184 %}
13185 ins_pipe(ialu_reg_reg);
13186 %}
13187
13188 // Or Register with Register using New Data Destination (NDD)
13189 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13190 %{
13191 predicate(UseAPX);
13192 match(Set dst (OrI src1 src2));
13193 effect(KILL cr);
13194 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13195
13196 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13197 ins_encode %{
13198 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13199 %}
13200 ins_pipe(ialu_reg_reg);
13201 %}
13202
13203 // Or Register with Immediate
13204 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13205 %{
13206 predicate(!UseAPX);
13207 match(Set dst (OrI dst src));
13208 effect(KILL cr);
13209 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13210
13211 format %{ "orl $dst, $src\t# int" %}
13212 ins_encode %{
13213 __ orl($dst$$Register, $src$$constant);
13214 %}
13215 ins_pipe(ialu_reg);
13216 %}
13217
13218 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13219 %{
13220 predicate(UseAPX);
13221 match(Set dst (OrI src1 src2));
13222 effect(KILL cr);
13223 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13224
13225 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13226 ins_encode %{
13227 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13228 %}
13229 ins_pipe(ialu_reg);
13230 %}
13231
13232 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13233 %{
13234 predicate(UseAPX);
13235 match(Set dst (OrI src1 src2));
13236 effect(KILL cr);
13237 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13238
13239 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13240 ins_encode %{
13241 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13242 %}
13243 ins_pipe(ialu_reg);
13244 %}
13245
13246 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13247 %{
13248 predicate(UseAPX);
13249 match(Set dst (OrI (LoadI src1) src2));
13250 effect(KILL cr);
13251 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13252
13253 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13254 ins_encode %{
13255 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13256 %}
13257 ins_pipe(ialu_reg);
13258 %}
13259
13260 // Or Register with Memory
13261 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13262 %{
13263 predicate(!UseAPX);
13264 match(Set dst (OrI dst (LoadI src)));
13265 effect(KILL cr);
13266 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13267
13268 ins_cost(150);
13269 format %{ "orl $dst, $src\t# int" %}
13270 ins_encode %{
13271 __ orl($dst$$Register, $src$$Address);
13272 %}
13273 ins_pipe(ialu_reg_mem);
13274 %}
13275
13276 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13277 %{
13278 predicate(UseAPX);
13279 match(Set dst (OrI src1 (LoadI src2)));
13280 effect(KILL cr);
13281 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13282
13283 ins_cost(150);
13284 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13285 ins_encode %{
13286 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13287 %}
13288 ins_pipe(ialu_reg_mem);
13289 %}
13290
13291 // Or Memory with Register
13292 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13293 %{
13294 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13295 effect(KILL cr);
13296 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13297
13298 ins_cost(150);
13299 format %{ "orb $dst, $src\t# byte" %}
13300 ins_encode %{
13301 __ orb($dst$$Address, $src$$Register);
13302 %}
13303 ins_pipe(ialu_mem_reg);
13304 %}
13305
13306 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13307 %{
13308 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13309 effect(KILL cr);
13310 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13311
13312 ins_cost(150);
13313 format %{ "orl $dst, $src\t# int" %}
13314 ins_encode %{
13315 __ orl($dst$$Address, $src$$Register);
13316 %}
13317 ins_pipe(ialu_mem_reg);
13318 %}
13319
13320 // Or Memory with Immediate
13321 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13322 %{
13323 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13324 effect(KILL cr);
13325 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13326
13327 ins_cost(125);
13328 format %{ "orl $dst, $src\t# int" %}
13329 ins_encode %{
13330 __ orl($dst$$Address, $src$$constant);
13331 %}
13332 ins_pipe(ialu_mem_imm);
13333 %}
13334
13335 // Xor Instructions
13336 // Xor Register with Register
13337 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13338 %{
13339 predicate(!UseAPX);
13340 match(Set dst (XorI dst src));
13341 effect(KILL cr);
13342 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13343
13344 format %{ "xorl $dst, $src\t# int" %}
13345 ins_encode %{
13346 __ xorl($dst$$Register, $src$$Register);
13347 %}
13348 ins_pipe(ialu_reg_reg);
13349 %}
13350
13351 // Xor Register with Register using New Data Destination (NDD)
13352 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13353 %{
13354 predicate(UseAPX);
13355 match(Set dst (XorI src1 src2));
13356 effect(KILL cr);
13357 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13358
13359 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13360 ins_encode %{
13361 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13362 %}
13363 ins_pipe(ialu_reg_reg);
13364 %}
13365
13366 // Xor Register with Immediate -1
13367 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13368 %{
13369 predicate(!UseAPX);
13370 match(Set dst (XorI dst imm));
13371
13372 format %{ "notl $dst" %}
13373 ins_encode %{
13374 __ notl($dst$$Register);
13375 %}
13376 ins_pipe(ialu_reg);
13377 %}
13378
13379 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13380 %{
13381 match(Set dst (XorI src imm));
13382 predicate(UseAPX);
13383
13384 format %{ "enotl $dst, $src" %}
13385 ins_encode %{
13386 __ enotl($dst$$Register, $src$$Register);
13387 %}
13388 ins_pipe(ialu_reg);
13389 %}
13390
13391 // Xor Register with Immediate
13392 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13393 %{
13394 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13395 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13396 match(Set dst (XorI dst src));
13397 effect(KILL cr);
13398 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13399
13400 format %{ "xorl $dst, $src\t# int" %}
13401 ins_encode %{
13402 __ xorl($dst$$Register, $src$$constant);
13403 %}
13404 ins_pipe(ialu_reg);
13405 %}
13406
13407 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13408 %{
13409 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13410 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13411 match(Set dst (XorI src1 src2));
13412 effect(KILL cr);
13413 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13414
13415 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13416 ins_encode %{
13417 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13418 %}
13419 ins_pipe(ialu_reg);
13420 %}
13421
13422 // Xor Memory with Immediate
13423 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13424 %{
13425 predicate(UseAPX);
13426 match(Set dst (XorI (LoadI src1) src2));
13427 effect(KILL cr);
13428 ins_cost(150);
13429 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13430
13431 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13432 ins_encode %{
13433 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13434 %}
13435 ins_pipe(ialu_reg);
13436 %}
13437
13438 // Xor Register with Memory
13439 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13440 %{
13441 predicate(!UseAPX);
13442 match(Set dst (XorI dst (LoadI src)));
13443 effect(KILL cr);
13444 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13445
13446 ins_cost(150);
13447 format %{ "xorl $dst, $src\t# int" %}
13448 ins_encode %{
13449 __ xorl($dst$$Register, $src$$Address);
13450 %}
13451 ins_pipe(ialu_reg_mem);
13452 %}
13453
13454 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13455 %{
13456 predicate(UseAPX);
13457 match(Set dst (XorI src1 (LoadI src2)));
13458 effect(KILL cr);
13459 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13460
13461 ins_cost(150);
13462 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13463 ins_encode %{
13464 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13465 %}
13466 ins_pipe(ialu_reg_mem);
13467 %}
13468
13469 // Xor Memory with Register
13470 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13471 %{
13472 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13473 effect(KILL cr);
13474 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13475
13476 ins_cost(150);
13477 format %{ "xorb $dst, $src\t# byte" %}
13478 ins_encode %{
13479 __ xorb($dst$$Address, $src$$Register);
13480 %}
13481 ins_pipe(ialu_mem_reg);
13482 %}
13483
13484 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13485 %{
13486 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13487 effect(KILL cr);
13488 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13489
13490 ins_cost(150);
13491 format %{ "xorl $dst, $src\t# int" %}
13492 ins_encode %{
13493 __ xorl($dst$$Address, $src$$Register);
13494 %}
13495 ins_pipe(ialu_mem_reg);
13496 %}
13497
13498 // Xor Memory with Immediate
13499 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13500 %{
13501 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13502 effect(KILL cr);
13503 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13504
13505 ins_cost(125);
13506 format %{ "xorl $dst, $src\t# int" %}
13507 ins_encode %{
13508 __ xorl($dst$$Address, $src$$constant);
13509 %}
13510 ins_pipe(ialu_mem_imm);
13511 %}
13512
13513
13514 // Long Logical Instructions
13515
13516 // And Instructions
13517 // And Register with Register
13518 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13519 %{
13520 predicate(!UseAPX);
13521 match(Set dst (AndL dst src));
13522 effect(KILL cr);
13523 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13524
13525 format %{ "andq $dst, $src\t# long" %}
13526 ins_encode %{
13527 __ andq($dst$$Register, $src$$Register);
13528 %}
13529 ins_pipe(ialu_reg_reg);
13530 %}
13531
13532 // And Register with Register using New Data Destination (NDD)
13533 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13534 %{
13535 predicate(UseAPX);
13536 match(Set dst (AndL src1 src2));
13537 effect(KILL cr);
13538 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13539
13540 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13541 ins_encode %{
13542 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13543
13544 %}
13545 ins_pipe(ialu_reg_reg);
13546 %}
13547
13548 // And Register with Immediate 255
13549 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13550 %{
13551 match(Set dst (AndL src mask));
13552
13553 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13554 ins_encode %{
13555 // movzbl zeroes out the upper 32-bit and does not need REX.W
13556 __ movzbl($dst$$Register, $src$$Register);
13557 %}
13558 ins_pipe(ialu_reg);
13559 %}
13560
13561 // And Register with Immediate 65535
13562 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13563 %{
13564 match(Set dst (AndL src mask));
13565
13566 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13567 ins_encode %{
13568 // movzwl zeroes out the upper 32-bit and does not need REX.W
13569 __ movzwl($dst$$Register, $src$$Register);
13570 %}
13571 ins_pipe(ialu_reg);
13572 %}
13573
13574 // And Register with Immediate
13575 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13576 %{
13577 predicate(!UseAPX);
13578 match(Set dst (AndL dst src));
13579 effect(KILL cr);
13580 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13581
13582 format %{ "andq $dst, $src\t# long" %}
13583 ins_encode %{
13584 __ andq($dst$$Register, $src$$constant);
13585 %}
13586 ins_pipe(ialu_reg);
13587 %}
13588
13589 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13590 %{
13591 predicate(UseAPX);
13592 match(Set dst (AndL src1 src2));
13593 effect(KILL cr);
13594 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13595
13596 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13597 ins_encode %{
13598 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13599 %}
13600 ins_pipe(ialu_reg);
13601 %}
13602
13603 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13604 %{
13605 predicate(UseAPX);
13606 match(Set dst (AndL (LoadL src1) src2));
13607 effect(KILL cr);
13608 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13609
13610 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13611 ins_encode %{
13612 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13613 %}
13614 ins_pipe(ialu_reg);
13615 %}
13616
13617 // And Register with Memory
13618 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13619 %{
13620 predicate(!UseAPX);
13621 match(Set dst (AndL dst (LoadL src)));
13622 effect(KILL cr);
13623 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13624
13625 ins_cost(150);
13626 format %{ "andq $dst, $src\t# long" %}
13627 ins_encode %{
13628 __ andq($dst$$Register, $src$$Address);
13629 %}
13630 ins_pipe(ialu_reg_mem);
13631 %}
13632
13633 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13634 %{
13635 predicate(UseAPX);
13636 match(Set dst (AndL src1 (LoadL src2)));
13637 effect(KILL cr);
13638 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13639
13640 ins_cost(150);
13641 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13642 ins_encode %{
13643 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13644 %}
13645 ins_pipe(ialu_reg_mem);
13646 %}
13647
13648 // And Memory with Register
13649 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13650 %{
13651 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13652 effect(KILL cr);
13653 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13654
13655 ins_cost(150);
13656 format %{ "andq $dst, $src\t# long" %}
13657 ins_encode %{
13658 __ andq($dst$$Address, $src$$Register);
13659 %}
13660 ins_pipe(ialu_mem_reg);
13661 %}
13662
13663 // And Memory with Immediate
13664 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13665 %{
13666 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13667 effect(KILL cr);
13668 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13669
13670 ins_cost(125);
13671 format %{ "andq $dst, $src\t# long" %}
13672 ins_encode %{
13673 __ andq($dst$$Address, $src$$constant);
13674 %}
13675 ins_pipe(ialu_mem_imm);
13676 %}
13677
13678 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13679 %{
13680 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13681 // because AND/OR works well enough for 8/32-bit values.
13682 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13683
13684 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13685 effect(KILL cr);
13686
13687 ins_cost(125);
13688 format %{ "btrq $dst, log2(not($con))\t# long" %}
13689 ins_encode %{
13690 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13691 %}
13692 ins_pipe(ialu_mem_imm);
13693 %}
13694
13695 // BMI1 instructions
13696 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13697 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13698 predicate(UseBMI1Instructions);
13699 effect(KILL cr);
13700 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13701
13702 ins_cost(125);
13703 format %{ "andnq $dst, $src1, $src2" %}
13704
13705 ins_encode %{
13706 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13707 %}
13708 ins_pipe(ialu_reg_mem);
13709 %}
13710
13711 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13712 match(Set dst (AndL (XorL src1 minus_1) src2));
13713 predicate(UseBMI1Instructions);
13714 effect(KILL cr);
13715 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13716
13717 format %{ "andnq $dst, $src1, $src2" %}
13718
13719 ins_encode %{
13720 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13721 %}
13722 ins_pipe(ialu_reg_mem);
13723 %}
13724
13725 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13726 match(Set dst (AndL (SubL imm_zero src) src));
13727 predicate(UseBMI1Instructions);
13728 effect(KILL cr);
13729 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13730
13731 format %{ "blsiq $dst, $src" %}
13732
13733 ins_encode %{
13734 __ blsiq($dst$$Register, $src$$Register);
13735 %}
13736 ins_pipe(ialu_reg);
13737 %}
13738
13739 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13740 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13741 predicate(UseBMI1Instructions);
13742 effect(KILL cr);
13743 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13744
13745 ins_cost(125);
13746 format %{ "blsiq $dst, $src" %}
13747
13748 ins_encode %{
13749 __ blsiq($dst$$Register, $src$$Address);
13750 %}
13751 ins_pipe(ialu_reg_mem);
13752 %}
13753
13754 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13755 %{
13756 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13757 predicate(UseBMI1Instructions);
13758 effect(KILL cr);
13759 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13760
13761 ins_cost(125);
13762 format %{ "blsmskq $dst, $src" %}
13763
13764 ins_encode %{
13765 __ blsmskq($dst$$Register, $src$$Address);
13766 %}
13767 ins_pipe(ialu_reg_mem);
13768 %}
13769
13770 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13771 %{
13772 match(Set dst (XorL (AddL src minus_1) src));
13773 predicate(UseBMI1Instructions);
13774 effect(KILL cr);
13775 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13776
13777 format %{ "blsmskq $dst, $src" %}
13778
13779 ins_encode %{
13780 __ blsmskq($dst$$Register, $src$$Register);
13781 %}
13782
13783 ins_pipe(ialu_reg);
13784 %}
13785
13786 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13787 %{
13788 match(Set dst (AndL (AddL src minus_1) src) );
13789 predicate(UseBMI1Instructions);
13790 effect(KILL cr);
13791 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13792
13793 format %{ "blsrq $dst, $src" %}
13794
13795 ins_encode %{
13796 __ blsrq($dst$$Register, $src$$Register);
13797 %}
13798
13799 ins_pipe(ialu_reg);
13800 %}
13801
13802 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13803 %{
13804 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13805 predicate(UseBMI1Instructions);
13806 effect(KILL cr);
13807 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13808
13809 ins_cost(125);
13810 format %{ "blsrq $dst, $src" %}
13811
13812 ins_encode %{
13813 __ blsrq($dst$$Register, $src$$Address);
13814 %}
13815
13816 ins_pipe(ialu_reg);
13817 %}
13818
13819 // Or Instructions
13820 // Or Register with Register
13821 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13822 %{
13823 predicate(!UseAPX);
13824 match(Set dst (OrL dst src));
13825 effect(KILL cr);
13826 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13827
13828 format %{ "orq $dst, $src\t# long" %}
13829 ins_encode %{
13830 __ orq($dst$$Register, $src$$Register);
13831 %}
13832 ins_pipe(ialu_reg_reg);
13833 %}
13834
13835 // Or Register with Register using New Data Destination (NDD)
13836 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13837 %{
13838 predicate(UseAPX);
13839 match(Set dst (OrL src1 src2));
13840 effect(KILL cr);
13841 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13842
13843 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13844 ins_encode %{
13845 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13846
13847 %}
13848 ins_pipe(ialu_reg_reg);
13849 %}
13850
13851 // Use any_RegP to match R15 (TLS register) without spilling.
13852 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13853 match(Set dst (OrL dst (CastP2X src)));
13854 effect(KILL cr);
13855 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13856
13857 format %{ "orq $dst, $src\t# long" %}
13858 ins_encode %{
13859 __ orq($dst$$Register, $src$$Register);
13860 %}
13861 ins_pipe(ialu_reg_reg);
13862 %}
13863
13864 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13865 match(Set dst (OrL src1 (CastP2X src2)));
13866 effect(KILL cr);
13867 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13868
13869 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13870 ins_encode %{
13871 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13872 %}
13873 ins_pipe(ialu_reg_reg);
13874 %}
13875
13876 // Or Register with Immediate
13877 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13878 %{
13879 predicate(!UseAPX);
13880 match(Set dst (OrL dst src));
13881 effect(KILL cr);
13882 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13883
13884 format %{ "orq $dst, $src\t# long" %}
13885 ins_encode %{
13886 __ orq($dst$$Register, $src$$constant);
13887 %}
13888 ins_pipe(ialu_reg);
13889 %}
13890
13891 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13892 %{
13893 predicate(UseAPX);
13894 match(Set dst (OrL src1 src2));
13895 effect(KILL cr);
13896 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13897
13898 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13899 ins_encode %{
13900 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13901 %}
13902 ins_pipe(ialu_reg);
13903 %}
13904
13905 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13906 %{
13907 predicate(UseAPX);
13908 match(Set dst (OrL src1 src2));
13909 effect(KILL cr);
13910 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13911
13912 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13913 ins_encode %{
13914 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13915 %}
13916 ins_pipe(ialu_reg);
13917 %}
13918
13919 // Or Memory with Immediate
13920 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13921 %{
13922 predicate(UseAPX);
13923 match(Set dst (OrL (LoadL src1) src2));
13924 effect(KILL cr);
13925 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13926
13927 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13928 ins_encode %{
13929 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13930 %}
13931 ins_pipe(ialu_reg);
13932 %}
13933
13934 // Or Register with Memory
13935 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13936 %{
13937 predicate(!UseAPX);
13938 match(Set dst (OrL dst (LoadL src)));
13939 effect(KILL cr);
13940 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13941
13942 ins_cost(150);
13943 format %{ "orq $dst, $src\t# long" %}
13944 ins_encode %{
13945 __ orq($dst$$Register, $src$$Address);
13946 %}
13947 ins_pipe(ialu_reg_mem);
13948 %}
13949
13950 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13951 %{
13952 predicate(UseAPX);
13953 match(Set dst (OrL src1 (LoadL src2)));
13954 effect(KILL cr);
13955 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13956
13957 ins_cost(150);
13958 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13959 ins_encode %{
13960 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13961 %}
13962 ins_pipe(ialu_reg_mem);
13963 %}
13964
13965 // Or Memory with Register
13966 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13967 %{
13968 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13969 effect(KILL cr);
13970 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13971
13972 ins_cost(150);
13973 format %{ "orq $dst, $src\t# long" %}
13974 ins_encode %{
13975 __ orq($dst$$Address, $src$$Register);
13976 %}
13977 ins_pipe(ialu_mem_reg);
13978 %}
13979
13980 // Or Memory with Immediate
13981 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13982 %{
13983 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13984 effect(KILL cr);
13985 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986
13987 ins_cost(125);
13988 format %{ "orq $dst, $src\t# long" %}
13989 ins_encode %{
13990 __ orq($dst$$Address, $src$$constant);
13991 %}
13992 ins_pipe(ialu_mem_imm);
13993 %}
13994
13995 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13996 %{
13997 // con should be a pure 64-bit power of 2 immediate
13998 // because AND/OR works well enough for 8/32-bit values.
13999 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14000
14001 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14002 effect(KILL cr);
14003
14004 ins_cost(125);
14005 format %{ "btsq $dst, log2($con)\t# long" %}
14006 ins_encode %{
14007 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14008 %}
14009 ins_pipe(ialu_mem_imm);
14010 %}
14011
14012 // Xor Instructions
14013 // Xor Register with Register
14014 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14015 %{
14016 predicate(!UseAPX);
14017 match(Set dst (XorL dst src));
14018 effect(KILL cr);
14019 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14020
14021 format %{ "xorq $dst, $src\t# long" %}
14022 ins_encode %{
14023 __ xorq($dst$$Register, $src$$Register);
14024 %}
14025 ins_pipe(ialu_reg_reg);
14026 %}
14027
14028 // Xor Register with Register using New Data Destination (NDD)
14029 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14030 %{
14031 predicate(UseAPX);
14032 match(Set dst (XorL src1 src2));
14033 effect(KILL cr);
14034 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14035
14036 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14037 ins_encode %{
14038 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14039 %}
14040 ins_pipe(ialu_reg_reg);
14041 %}
14042
14043 // Xor Register with Immediate -1
14044 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14045 %{
14046 predicate(!UseAPX);
14047 match(Set dst (XorL dst imm));
14048
14049 format %{ "notq $dst" %}
14050 ins_encode %{
14051 __ notq($dst$$Register);
14052 %}
14053 ins_pipe(ialu_reg);
14054 %}
14055
14056 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14057 %{
14058 predicate(UseAPX);
14059 match(Set dst (XorL src imm));
14060
14061 format %{ "enotq $dst, $src" %}
14062 ins_encode %{
14063 __ enotq($dst$$Register, $src$$Register);
14064 %}
14065 ins_pipe(ialu_reg);
14066 %}
14067
14068 // Xor Register with Immediate
14069 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14070 %{
14071 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14072 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14073 match(Set dst (XorL dst src));
14074 effect(KILL cr);
14075 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14076
14077 format %{ "xorq $dst, $src\t# long" %}
14078 ins_encode %{
14079 __ xorq($dst$$Register, $src$$constant);
14080 %}
14081 ins_pipe(ialu_reg);
14082 %}
14083
14084 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14085 %{
14086 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14087 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14088 match(Set dst (XorL src1 src2));
14089 effect(KILL cr);
14090 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14091
14092 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14093 ins_encode %{
14094 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14095 %}
14096 ins_pipe(ialu_reg);
14097 %}
14098
14099 // Xor Memory with Immediate
14100 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14101 %{
14102 predicate(UseAPX);
14103 match(Set dst (XorL (LoadL src1) src2));
14104 effect(KILL cr);
14105 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14106 ins_cost(150);
14107
14108 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14109 ins_encode %{
14110 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14111 %}
14112 ins_pipe(ialu_reg);
14113 %}
14114
14115 // Xor Register with Memory
14116 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14117 %{
14118 predicate(!UseAPX);
14119 match(Set dst (XorL dst (LoadL src)));
14120 effect(KILL cr);
14121 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14122
14123 ins_cost(150);
14124 format %{ "xorq $dst, $src\t# long" %}
14125 ins_encode %{
14126 __ xorq($dst$$Register, $src$$Address);
14127 %}
14128 ins_pipe(ialu_reg_mem);
14129 %}
14130
14131 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14132 %{
14133 predicate(UseAPX);
14134 match(Set dst (XorL src1 (LoadL src2)));
14135 effect(KILL cr);
14136 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14137
14138 ins_cost(150);
14139 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14140 ins_encode %{
14141 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14142 %}
14143 ins_pipe(ialu_reg_mem);
14144 %}
14145
14146 // Xor Memory with Register
14147 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14148 %{
14149 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14150 effect(KILL cr);
14151 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14152
14153 ins_cost(150);
14154 format %{ "xorq $dst, $src\t# long" %}
14155 ins_encode %{
14156 __ xorq($dst$$Address, $src$$Register);
14157 %}
14158 ins_pipe(ialu_mem_reg);
14159 %}
14160
14161 // Xor Memory with Immediate
14162 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14163 %{
14164 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14165 effect(KILL cr);
14166 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14167
14168 ins_cost(125);
14169 format %{ "xorq $dst, $src\t# long" %}
14170 ins_encode %{
14171 __ xorq($dst$$Address, $src$$constant);
14172 %}
14173 ins_pipe(ialu_mem_imm);
14174 %}
14175
14176 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14177 %{
14178 match(Set dst (CmpLTMask p q));
14179 effect(KILL cr);
14180
14181 ins_cost(400);
14182 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14183 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14184 "negl $dst" %}
14185 ins_encode %{
14186 __ cmpl($p$$Register, $q$$Register);
14187 __ setcc(Assembler::less, $dst$$Register);
14188 __ negl($dst$$Register);
14189 %}
14190 ins_pipe(pipe_slow);
14191 %}
14192
14193 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14194 %{
14195 match(Set dst (CmpLTMask dst zero));
14196 effect(KILL cr);
14197
14198 ins_cost(100);
14199 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14200 ins_encode %{
14201 __ sarl($dst$$Register, 31);
14202 %}
14203 ins_pipe(ialu_reg);
14204 %}
14205
14206 /* Better to save a register than avoid a branch */
14207 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14208 %{
14209 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14210 effect(KILL cr);
14211 ins_cost(300);
14212 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14213 "jge done\n\t"
14214 "addl $p,$y\n"
14215 "done: " %}
14216 ins_encode %{
14217 Register Rp = $p$$Register;
14218 Register Rq = $q$$Register;
14219 Register Ry = $y$$Register;
14220 Label done;
14221 __ subl(Rp, Rq);
14222 __ jccb(Assembler::greaterEqual, done);
14223 __ addl(Rp, Ry);
14224 __ bind(done);
14225 %}
14226 ins_pipe(pipe_cmplt);
14227 %}
14228
14229 /* Better to save a register than avoid a branch */
14230 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14231 %{
14232 match(Set y (AndI (CmpLTMask p q) y));
14233 effect(KILL cr);
14234
14235 ins_cost(300);
14236
14237 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14238 "jlt done\n\t"
14239 "xorl $y, $y\n"
14240 "done: " %}
14241 ins_encode %{
14242 Register Rp = $p$$Register;
14243 Register Rq = $q$$Register;
14244 Register Ry = $y$$Register;
14245 Label done;
14246 __ cmpl(Rp, Rq);
14247 __ jccb(Assembler::less, done);
14248 __ xorl(Ry, Ry);
14249 __ bind(done);
14250 %}
14251 ins_pipe(pipe_cmplt);
14252 %}
14253
14254
14255 //---------- FP Instructions------------------------------------------------
14256
14257 // Really expensive, avoid
14258 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14259 %{
14260 match(Set cr (CmpF src1 src2));
14261
14262 ins_cost(500);
14263 format %{ "ucomiss $src1, $src2\n\t"
14264 "jnp,s exit\n\t"
14265 "pushfq\t# saw NaN, set CF\n\t"
14266 "andq [rsp], #0xffffff2b\n\t"
14267 "popfq\n"
14268 "exit:" %}
14269 ins_encode %{
14270 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14271 emit_cmpfp_fixup(masm);
14272 %}
14273 ins_pipe(pipe_slow);
14274 %}
14275
14276 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14277 match(Set cr (CmpF src1 src2));
14278
14279 ins_cost(100);
14280 format %{ "ucomiss $src1, $src2" %}
14281 ins_encode %{
14282 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14283 %}
14284 ins_pipe(pipe_slow);
14285 %}
14286
14287 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14288 match(Set cr (CmpF src1 (LoadF src2)));
14289
14290 ins_cost(100);
14291 format %{ "ucomiss $src1, $src2" %}
14292 ins_encode %{
14293 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14294 %}
14295 ins_pipe(pipe_slow);
14296 %}
14297
14298 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14299 match(Set cr (CmpF src con));
14300 ins_cost(100);
14301 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14302 ins_encode %{
14303 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14304 %}
14305 ins_pipe(pipe_slow);
14306 %}
14307
14308 // Really expensive, avoid
14309 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14310 %{
14311 match(Set cr (CmpD src1 src2));
14312
14313 ins_cost(500);
14314 format %{ "ucomisd $src1, $src2\n\t"
14315 "jnp,s exit\n\t"
14316 "pushfq\t# saw NaN, set CF\n\t"
14317 "andq [rsp], #0xffffff2b\n\t"
14318 "popfq\n"
14319 "exit:" %}
14320 ins_encode %{
14321 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14322 emit_cmpfp_fixup(masm);
14323 %}
14324 ins_pipe(pipe_slow);
14325 %}
14326
14327 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14328 match(Set cr (CmpD src1 src2));
14329
14330 ins_cost(100);
14331 format %{ "ucomisd $src1, $src2 test" %}
14332 ins_encode %{
14333 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14334 %}
14335 ins_pipe(pipe_slow);
14336 %}
14337
14338 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14339 match(Set cr (CmpD src1 (LoadD src2)));
14340
14341 ins_cost(100);
14342 format %{ "ucomisd $src1, $src2" %}
14343 ins_encode %{
14344 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14345 %}
14346 ins_pipe(pipe_slow);
14347 %}
14348
14349 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14350 match(Set cr (CmpD src con));
14351 ins_cost(100);
14352 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14353 ins_encode %{
14354 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14355 %}
14356 ins_pipe(pipe_slow);
14357 %}
14358
14359 // Compare into -1,0,1
14360 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14361 %{
14362 match(Set dst (CmpF3 src1 src2));
14363 effect(KILL cr);
14364
14365 ins_cost(275);
14366 format %{ "ucomiss $src1, $src2\n\t"
14367 "movl $dst, #-1\n\t"
14368 "jp,s done\n\t"
14369 "jb,s done\n\t"
14370 "setne $dst\n\t"
14371 "movzbl $dst, $dst\n"
14372 "done:" %}
14373 ins_encode %{
14374 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14375 emit_cmpfp3(masm, $dst$$Register);
14376 %}
14377 ins_pipe(pipe_slow);
14378 %}
14379
14380 // Compare into -1,0,1
14381 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14382 %{
14383 match(Set dst (CmpF3 src1 (LoadF src2)));
14384 effect(KILL cr);
14385
14386 ins_cost(275);
14387 format %{ "ucomiss $src1, $src2\n\t"
14388 "movl $dst, #-1\n\t"
14389 "jp,s done\n\t"
14390 "jb,s done\n\t"
14391 "setne $dst\n\t"
14392 "movzbl $dst, $dst\n"
14393 "done:" %}
14394 ins_encode %{
14395 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14396 emit_cmpfp3(masm, $dst$$Register);
14397 %}
14398 ins_pipe(pipe_slow);
14399 %}
14400
14401 // Compare into -1,0,1
14402 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14403 match(Set dst (CmpF3 src con));
14404 effect(KILL cr);
14405
14406 ins_cost(275);
14407 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14408 "movl $dst, #-1\n\t"
14409 "jp,s done\n\t"
14410 "jb,s done\n\t"
14411 "setne $dst\n\t"
14412 "movzbl $dst, $dst\n"
14413 "done:" %}
14414 ins_encode %{
14415 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14416 emit_cmpfp3(masm, $dst$$Register);
14417 %}
14418 ins_pipe(pipe_slow);
14419 %}
14420
14421 // Compare into -1,0,1
14422 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14423 %{
14424 match(Set dst (CmpD3 src1 src2));
14425 effect(KILL cr);
14426
14427 ins_cost(275);
14428 format %{ "ucomisd $src1, $src2\n\t"
14429 "movl $dst, #-1\n\t"
14430 "jp,s done\n\t"
14431 "jb,s done\n\t"
14432 "setne $dst\n\t"
14433 "movzbl $dst, $dst\n"
14434 "done:" %}
14435 ins_encode %{
14436 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14437 emit_cmpfp3(masm, $dst$$Register);
14438 %}
14439 ins_pipe(pipe_slow);
14440 %}
14441
14442 // Compare into -1,0,1
14443 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14444 %{
14445 match(Set dst (CmpD3 src1 (LoadD src2)));
14446 effect(KILL cr);
14447
14448 ins_cost(275);
14449 format %{ "ucomisd $src1, $src2\n\t"
14450 "movl $dst, #-1\n\t"
14451 "jp,s done\n\t"
14452 "jb,s done\n\t"
14453 "setne $dst\n\t"
14454 "movzbl $dst, $dst\n"
14455 "done:" %}
14456 ins_encode %{
14457 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14458 emit_cmpfp3(masm, $dst$$Register);
14459 %}
14460 ins_pipe(pipe_slow);
14461 %}
14462
14463 // Compare into -1,0,1
14464 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14465 match(Set dst (CmpD3 src con));
14466 effect(KILL cr);
14467
14468 ins_cost(275);
14469 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14470 "movl $dst, #-1\n\t"
14471 "jp,s done\n\t"
14472 "jb,s done\n\t"
14473 "setne $dst\n\t"
14474 "movzbl $dst, $dst\n"
14475 "done:" %}
14476 ins_encode %{
14477 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14478 emit_cmpfp3(masm, $dst$$Register);
14479 %}
14480 ins_pipe(pipe_slow);
14481 %}
14482
14483 //----------Arithmetic Conversion Instructions---------------------------------
14484
14485 instruct convF2D_reg_reg(regD dst, regF src)
14486 %{
14487 match(Set dst (ConvF2D src));
14488
14489 format %{ "cvtss2sd $dst, $src" %}
14490 ins_encode %{
14491 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14492 %}
14493 ins_pipe(pipe_slow); // XXX
14494 %}
14495
14496 instruct convF2D_reg_mem(regD dst, memory src)
14497 %{
14498 predicate(UseAVX == 0);
14499 match(Set dst (ConvF2D (LoadF src)));
14500
14501 format %{ "cvtss2sd $dst, $src" %}
14502 ins_encode %{
14503 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14504 %}
14505 ins_pipe(pipe_slow); // XXX
14506 %}
14507
14508 instruct convD2F_reg_reg(regF dst, regD src)
14509 %{
14510 match(Set dst (ConvD2F src));
14511
14512 format %{ "cvtsd2ss $dst, $src" %}
14513 ins_encode %{
14514 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14515 %}
14516 ins_pipe(pipe_slow); // XXX
14517 %}
14518
14519 instruct convD2F_reg_mem(regF dst, memory src)
14520 %{
14521 predicate(UseAVX == 0);
14522 match(Set dst (ConvD2F (LoadD src)));
14523
14524 format %{ "cvtsd2ss $dst, $src" %}
14525 ins_encode %{
14526 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14527 %}
14528 ins_pipe(pipe_slow); // XXX
14529 %}
14530
14531 // XXX do mem variants
14532 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14533 %{
14534 predicate(!VM_Version::supports_avx10_2());
14535 match(Set dst (ConvF2I src));
14536 effect(KILL cr);
14537 format %{ "convert_f2i $dst, $src" %}
14538 ins_encode %{
14539 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14540 %}
14541 ins_pipe(pipe_slow);
14542 %}
14543
14544 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14545 %{
14546 predicate(VM_Version::supports_avx10_2());
14547 match(Set dst (ConvF2I src));
14548 format %{ "evcvttss2sisl $dst, $src" %}
14549 ins_encode %{
14550 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14551 %}
14552 ins_pipe(pipe_slow);
14553 %}
14554
14555 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14556 %{
14557 predicate(VM_Version::supports_avx10_2());
14558 match(Set dst (ConvF2I (LoadF src)));
14559 format %{ "evcvttss2sisl $dst, $src" %}
14560 ins_encode %{
14561 __ evcvttss2sisl($dst$$Register, $src$$Address);
14562 %}
14563 ins_pipe(pipe_slow);
14564 %}
14565
14566 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14567 %{
14568 predicate(!VM_Version::supports_avx10_2());
14569 match(Set dst (ConvF2L src));
14570 effect(KILL cr);
14571 format %{ "convert_f2l $dst, $src"%}
14572 ins_encode %{
14573 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14574 %}
14575 ins_pipe(pipe_slow);
14576 %}
14577
14578 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14579 %{
14580 predicate(VM_Version::supports_avx10_2());
14581 match(Set dst (ConvF2L src));
14582 format %{ "evcvttss2sisq $dst, $src" %}
14583 ins_encode %{
14584 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14585 %}
14586 ins_pipe(pipe_slow);
14587 %}
14588
14589 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14590 %{
14591 predicate(VM_Version::supports_avx10_2());
14592 match(Set dst (ConvF2L (LoadF src)));
14593 format %{ "evcvttss2sisq $dst, $src" %}
14594 ins_encode %{
14595 __ evcvttss2sisq($dst$$Register, $src$$Address);
14596 %}
14597 ins_pipe(pipe_slow);
14598 %}
14599
14600 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14601 %{
14602 predicate(!VM_Version::supports_avx10_2());
14603 match(Set dst (ConvD2I src));
14604 effect(KILL cr);
14605 format %{ "convert_d2i $dst, $src"%}
14606 ins_encode %{
14607 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14608 %}
14609 ins_pipe(pipe_slow);
14610 %}
14611
14612 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14613 %{
14614 predicate(VM_Version::supports_avx10_2());
14615 match(Set dst (ConvD2I src));
14616 format %{ "evcvttsd2sisl $dst, $src" %}
14617 ins_encode %{
14618 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14619 %}
14620 ins_pipe(pipe_slow);
14621 %}
14622
14623 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14624 %{
14625 predicate(VM_Version::supports_avx10_2());
14626 match(Set dst (ConvD2I (LoadD src)));
14627 format %{ "evcvttsd2sisl $dst, $src" %}
14628 ins_encode %{
14629 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14630 %}
14631 ins_pipe(pipe_slow);
14632 %}
14633
14634 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14635 %{
14636 predicate(!VM_Version::supports_avx10_2());
14637 match(Set dst (ConvD2L src));
14638 effect(KILL cr);
14639 format %{ "convert_d2l $dst, $src"%}
14640 ins_encode %{
14641 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14642 %}
14643 ins_pipe(pipe_slow);
14644 %}
14645
14646 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14647 %{
14648 predicate(VM_Version::supports_avx10_2());
14649 match(Set dst (ConvD2L src));
14650 format %{ "evcvttsd2sisq $dst, $src" %}
14651 ins_encode %{
14652 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14653 %}
14654 ins_pipe(pipe_slow);
14655 %}
14656
14657 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14658 %{
14659 predicate(VM_Version::supports_avx10_2());
14660 match(Set dst (ConvD2L (LoadD src)));
14661 format %{ "evcvttsd2sisq $dst, $src" %}
14662 ins_encode %{
14663 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14664 %}
14665 ins_pipe(pipe_slow);
14666 %}
14667
14668 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14669 %{
14670 match(Set dst (RoundD src));
14671 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14672 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14673 ins_encode %{
14674 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14675 %}
14676 ins_pipe(pipe_slow);
14677 %}
14678
14679 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14680 %{
14681 match(Set dst (RoundF src));
14682 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14683 format %{ "round_float $dst,$src" %}
14684 ins_encode %{
14685 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14686 %}
14687 ins_pipe(pipe_slow);
14688 %}
14689
14690 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14691 %{
14692 predicate(!UseXmmI2F);
14693 match(Set dst (ConvI2F src));
14694
14695 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14696 ins_encode %{
14697 if (UseAVX > 0) {
14698 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14699 }
14700 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14701 %}
14702 ins_pipe(pipe_slow); // XXX
14703 %}
14704
14705 instruct convI2F_reg_mem(regF dst, memory src)
14706 %{
14707 predicate(UseAVX == 0);
14708 match(Set dst (ConvI2F (LoadI src)));
14709
14710 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14711 ins_encode %{
14712 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14713 %}
14714 ins_pipe(pipe_slow); // XXX
14715 %}
14716
14717 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14718 %{
14719 predicate(!UseXmmI2D);
14720 match(Set dst (ConvI2D src));
14721
14722 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14723 ins_encode %{
14724 if (UseAVX > 0) {
14725 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14726 }
14727 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14728 %}
14729 ins_pipe(pipe_slow); // XXX
14730 %}
14731
14732 instruct convI2D_reg_mem(regD dst, memory src)
14733 %{
14734 predicate(UseAVX == 0);
14735 match(Set dst (ConvI2D (LoadI src)));
14736
14737 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14738 ins_encode %{
14739 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14740 %}
14741 ins_pipe(pipe_slow); // XXX
14742 %}
14743
14744 instruct convXI2F_reg(regF dst, rRegI src)
14745 %{
14746 predicate(UseXmmI2F);
14747 match(Set dst (ConvI2F src));
14748
14749 format %{ "movdl $dst, $src\n\t"
14750 "cvtdq2psl $dst, $dst\t# i2f" %}
14751 ins_encode %{
14752 __ movdl($dst$$XMMRegister, $src$$Register);
14753 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14754 %}
14755 ins_pipe(pipe_slow); // XXX
14756 %}
14757
14758 instruct convXI2D_reg(regD dst, rRegI src)
14759 %{
14760 predicate(UseXmmI2D);
14761 match(Set dst (ConvI2D src));
14762
14763 format %{ "movdl $dst, $src\n\t"
14764 "cvtdq2pdl $dst, $dst\t# i2d" %}
14765 ins_encode %{
14766 __ movdl($dst$$XMMRegister, $src$$Register);
14767 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14768 %}
14769 ins_pipe(pipe_slow); // XXX
14770 %}
14771
14772 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14773 %{
14774 match(Set dst (ConvL2F src));
14775
14776 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14777 ins_encode %{
14778 if (UseAVX > 0) {
14779 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14780 }
14781 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14782 %}
14783 ins_pipe(pipe_slow); // XXX
14784 %}
14785
14786 instruct convL2F_reg_mem(regF dst, memory src)
14787 %{
14788 predicate(UseAVX == 0);
14789 match(Set dst (ConvL2F (LoadL src)));
14790
14791 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14792 ins_encode %{
14793 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14794 %}
14795 ins_pipe(pipe_slow); // XXX
14796 %}
14797
14798 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14799 %{
14800 match(Set dst (ConvL2D src));
14801
14802 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14803 ins_encode %{
14804 if (UseAVX > 0) {
14805 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14806 }
14807 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14808 %}
14809 ins_pipe(pipe_slow); // XXX
14810 %}
14811
14812 instruct convL2D_reg_mem(regD dst, memory src)
14813 %{
14814 predicate(UseAVX == 0);
14815 match(Set dst (ConvL2D (LoadL src)));
14816
14817 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14818 ins_encode %{
14819 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14820 %}
14821 ins_pipe(pipe_slow); // XXX
14822 %}
14823
14824 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14825 %{
14826 match(Set dst (ConvI2L src));
14827
14828 ins_cost(125);
14829 format %{ "movslq $dst, $src\t# i2l" %}
14830 ins_encode %{
14831 __ movslq($dst$$Register, $src$$Register);
14832 %}
14833 ins_pipe(ialu_reg_reg);
14834 %}
14835
14836 // Zero-extend convert int to long
14837 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14838 %{
14839 match(Set dst (AndL (ConvI2L src) mask));
14840
14841 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14842 ins_encode %{
14843 if ($dst$$reg != $src$$reg) {
14844 __ movl($dst$$Register, $src$$Register);
14845 }
14846 %}
14847 ins_pipe(ialu_reg_reg);
14848 %}
14849
14850 // Zero-extend convert int to long
14851 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14852 %{
14853 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14854
14855 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14856 ins_encode %{
14857 __ movl($dst$$Register, $src$$Address);
14858 %}
14859 ins_pipe(ialu_reg_mem);
14860 %}
14861
14862 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14863 %{
14864 match(Set dst (AndL src mask));
14865
14866 format %{ "movl $dst, $src\t# zero-extend long" %}
14867 ins_encode %{
14868 __ movl($dst$$Register, $src$$Register);
14869 %}
14870 ins_pipe(ialu_reg_reg);
14871 %}
14872
14873 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14874 %{
14875 match(Set dst (ConvL2I src));
14876
14877 format %{ "movl $dst, $src\t# l2i" %}
14878 ins_encode %{
14879 __ movl($dst$$Register, $src$$Register);
14880 %}
14881 ins_pipe(ialu_reg_reg);
14882 %}
14883
14884
14885 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14886 match(Set dst (MoveF2I src));
14887 effect(DEF dst, USE src);
14888
14889 ins_cost(125);
14890 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14891 ins_encode %{
14892 __ movl($dst$$Register, Address(rsp, $src$$disp));
14893 %}
14894 ins_pipe(ialu_reg_mem);
14895 %}
14896
14897 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14898 match(Set dst (MoveI2F src));
14899 effect(DEF dst, USE src);
14900
14901 ins_cost(125);
14902 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14903 ins_encode %{
14904 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14905 %}
14906 ins_pipe(pipe_slow);
14907 %}
14908
14909 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14910 match(Set dst (MoveD2L src));
14911 effect(DEF dst, USE src);
14912
14913 ins_cost(125);
14914 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14915 ins_encode %{
14916 __ movq($dst$$Register, Address(rsp, $src$$disp));
14917 %}
14918 ins_pipe(ialu_reg_mem);
14919 %}
14920
14921 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14922 predicate(!UseXmmLoadAndClearUpper);
14923 match(Set dst (MoveL2D src));
14924 effect(DEF dst, USE src);
14925
14926 ins_cost(125);
14927 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14928 ins_encode %{
14929 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14930 %}
14931 ins_pipe(pipe_slow);
14932 %}
14933
14934 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14935 predicate(UseXmmLoadAndClearUpper);
14936 match(Set dst (MoveL2D src));
14937 effect(DEF dst, USE src);
14938
14939 ins_cost(125);
14940 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14941 ins_encode %{
14942 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14943 %}
14944 ins_pipe(pipe_slow);
14945 %}
14946
14947
14948 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14949 match(Set dst (MoveF2I src));
14950 effect(DEF dst, USE src);
14951
14952 ins_cost(95); // XXX
14953 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
14954 ins_encode %{
14955 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14956 %}
14957 ins_pipe(pipe_slow);
14958 %}
14959
14960 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14961 match(Set dst (MoveI2F src));
14962 effect(DEF dst, USE src);
14963
14964 ins_cost(100);
14965 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
14966 ins_encode %{
14967 __ movl(Address(rsp, $dst$$disp), $src$$Register);
14968 %}
14969 ins_pipe( ialu_mem_reg );
14970 %}
14971
14972 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14973 match(Set dst (MoveD2L src));
14974 effect(DEF dst, USE src);
14975
14976 ins_cost(95); // XXX
14977 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
14978 ins_encode %{
14979 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14980 %}
14981 ins_pipe(pipe_slow);
14982 %}
14983
14984 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14985 match(Set dst (MoveL2D src));
14986 effect(DEF dst, USE src);
14987
14988 ins_cost(100);
14989 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
14990 ins_encode %{
14991 __ movq(Address(rsp, $dst$$disp), $src$$Register);
14992 %}
14993 ins_pipe(ialu_mem_reg);
14994 %}
14995
14996 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14997 match(Set dst (MoveF2I src));
14998 effect(DEF dst, USE src);
14999 ins_cost(85);
15000 format %{ "movd $dst,$src\t# MoveF2I" %}
15001 ins_encode %{
15002 __ movdl($dst$$Register, $src$$XMMRegister);
15003 %}
15004 ins_pipe( pipe_slow );
15005 %}
15006
15007 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15008 match(Set dst (MoveD2L src));
15009 effect(DEF dst, USE src);
15010 ins_cost(85);
15011 format %{ "movd $dst,$src\t# MoveD2L" %}
15012 ins_encode %{
15013 __ movdq($dst$$Register, $src$$XMMRegister);
15014 %}
15015 ins_pipe( pipe_slow );
15016 %}
15017
15018 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15019 match(Set dst (MoveI2F src));
15020 effect(DEF dst, USE src);
15021 ins_cost(100);
15022 format %{ "movd $dst,$src\t# MoveI2F" %}
15023 ins_encode %{
15024 __ movdl($dst$$XMMRegister, $src$$Register);
15025 %}
15026 ins_pipe( pipe_slow );
15027 %}
15028
15029 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15030 match(Set dst (MoveL2D src));
15031 effect(DEF dst, USE src);
15032 ins_cost(100);
15033 format %{ "movd $dst,$src\t# MoveL2D" %}
15034 ins_encode %{
15035 __ movdq($dst$$XMMRegister, $src$$Register);
15036 %}
15037 ins_pipe( pipe_slow );
15038 %}
15039
15040 // Fast clearing of an array
15041 // Small non-constant lenght ClearArray for non-AVX512 targets.
15042 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15043 Universe dummy, rFlagsReg cr)
15044 %{
15045 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15046 match(Set dummy (ClearArray cnt base));
15047 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15048
15049 format %{ $$template
15050 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15051 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15052 $$emit$$"jg LARGE\n\t"
15053 $$emit$$"dec rcx\n\t"
15054 $$emit$$"js DONE\t# Zero length\n\t"
15055 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15056 $$emit$$"dec rcx\n\t"
15057 $$emit$$"jge LOOP\n\t"
15058 $$emit$$"jmp DONE\n\t"
15059 $$emit$$"# LARGE:\n\t"
15060 if (UseFastStosb) {
15061 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15062 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15063 } else if (UseXMMForObjInit) {
15064 $$emit$$"mov rdi,rax\n\t"
15065 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15066 $$emit$$"jmpq L_zero_64_bytes\n\t"
15067 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15068 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15069 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15070 $$emit$$"add 0x40,rax\n\t"
15071 $$emit$$"# L_zero_64_bytes:\n\t"
15072 $$emit$$"sub 0x8,rcx\n\t"
15073 $$emit$$"jge L_loop\n\t"
15074 $$emit$$"add 0x4,rcx\n\t"
15075 $$emit$$"jl L_tail\n\t"
15076 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15077 $$emit$$"add 0x20,rax\n\t"
15078 $$emit$$"sub 0x4,rcx\n\t"
15079 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15080 $$emit$$"add 0x4,rcx\n\t"
15081 $$emit$$"jle L_end\n\t"
15082 $$emit$$"dec rcx\n\t"
15083 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15084 $$emit$$"vmovq xmm0,(rax)\n\t"
15085 $$emit$$"add 0x8,rax\n\t"
15086 $$emit$$"dec rcx\n\t"
15087 $$emit$$"jge L_sloop\n\t"
15088 $$emit$$"# L_end:\n\t"
15089 } else {
15090 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15091 }
15092 $$emit$$"# DONE"
15093 %}
15094 ins_encode %{
15095 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15096 $tmp$$XMMRegister, false, knoreg);
15097 %}
15098 ins_pipe(pipe_slow);
15099 %}
15100
15101 // Small non-constant length ClearArray for AVX512 targets.
15102 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15103 Universe dummy, rFlagsReg cr)
15104 %{
15105 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15106 match(Set dummy (ClearArray cnt base));
15107 ins_cost(125);
15108 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15109
15110 format %{ $$template
15111 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15112 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15113 $$emit$$"jg LARGE\n\t"
15114 $$emit$$"dec rcx\n\t"
15115 $$emit$$"js DONE\t# Zero length\n\t"
15116 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15117 $$emit$$"dec rcx\n\t"
15118 $$emit$$"jge LOOP\n\t"
15119 $$emit$$"jmp DONE\n\t"
15120 $$emit$$"# LARGE:\n\t"
15121 if (UseFastStosb) {
15122 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15123 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15124 } else if (UseXMMForObjInit) {
15125 $$emit$$"mov rdi,rax\n\t"
15126 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15127 $$emit$$"jmpq L_zero_64_bytes\n\t"
15128 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15129 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15130 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15131 $$emit$$"add 0x40,rax\n\t"
15132 $$emit$$"# L_zero_64_bytes:\n\t"
15133 $$emit$$"sub 0x8,rcx\n\t"
15134 $$emit$$"jge L_loop\n\t"
15135 $$emit$$"add 0x4,rcx\n\t"
15136 $$emit$$"jl L_tail\n\t"
15137 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15138 $$emit$$"add 0x20,rax\n\t"
15139 $$emit$$"sub 0x4,rcx\n\t"
15140 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15141 $$emit$$"add 0x4,rcx\n\t"
15142 $$emit$$"jle L_end\n\t"
15143 $$emit$$"dec rcx\n\t"
15144 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15145 $$emit$$"vmovq xmm0,(rax)\n\t"
15146 $$emit$$"add 0x8,rax\n\t"
15147 $$emit$$"dec rcx\n\t"
15148 $$emit$$"jge L_sloop\n\t"
15149 $$emit$$"# L_end:\n\t"
15150 } else {
15151 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15152 }
15153 $$emit$$"# DONE"
15154 %}
15155 ins_encode %{
15156 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15157 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15158 %}
15159 ins_pipe(pipe_slow);
15160 %}
15161
15162 // Large non-constant length ClearArray for non-AVX512 targets.
15163 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15164 Universe dummy, rFlagsReg cr)
15165 %{
15166 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15167 match(Set dummy (ClearArray cnt base));
15168 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15169
15170 format %{ $$template
15171 if (UseFastStosb) {
15172 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15173 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15174 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15175 } else if (UseXMMForObjInit) {
15176 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15177 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15178 $$emit$$"jmpq L_zero_64_bytes\n\t"
15179 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15180 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15181 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15182 $$emit$$"add 0x40,rax\n\t"
15183 $$emit$$"# L_zero_64_bytes:\n\t"
15184 $$emit$$"sub 0x8,rcx\n\t"
15185 $$emit$$"jge L_loop\n\t"
15186 $$emit$$"add 0x4,rcx\n\t"
15187 $$emit$$"jl L_tail\n\t"
15188 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15189 $$emit$$"add 0x20,rax\n\t"
15190 $$emit$$"sub 0x4,rcx\n\t"
15191 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15192 $$emit$$"add 0x4,rcx\n\t"
15193 $$emit$$"jle L_end\n\t"
15194 $$emit$$"dec rcx\n\t"
15195 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15196 $$emit$$"vmovq xmm0,(rax)\n\t"
15197 $$emit$$"add 0x8,rax\n\t"
15198 $$emit$$"dec rcx\n\t"
15199 $$emit$$"jge L_sloop\n\t"
15200 $$emit$$"# L_end:\n\t"
15201 } else {
15202 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15203 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15204 }
15205 %}
15206 ins_encode %{
15207 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15208 $tmp$$XMMRegister, true, knoreg);
15209 %}
15210 ins_pipe(pipe_slow);
15211 %}
15212
15213 // Large non-constant length ClearArray for AVX512 targets.
15214 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15215 Universe dummy, rFlagsReg cr)
15216 %{
15217 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15218 match(Set dummy (ClearArray cnt base));
15219 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15220
15221 format %{ $$template
15222 if (UseFastStosb) {
15223 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15224 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15225 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15226 } else if (UseXMMForObjInit) {
15227 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15228 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15229 $$emit$$"jmpq L_zero_64_bytes\n\t"
15230 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15231 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15233 $$emit$$"add 0x40,rax\n\t"
15234 $$emit$$"# L_zero_64_bytes:\n\t"
15235 $$emit$$"sub 0x8,rcx\n\t"
15236 $$emit$$"jge L_loop\n\t"
15237 $$emit$$"add 0x4,rcx\n\t"
15238 $$emit$$"jl L_tail\n\t"
15239 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15240 $$emit$$"add 0x20,rax\n\t"
15241 $$emit$$"sub 0x4,rcx\n\t"
15242 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15243 $$emit$$"add 0x4,rcx\n\t"
15244 $$emit$$"jle L_end\n\t"
15245 $$emit$$"dec rcx\n\t"
15246 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15247 $$emit$$"vmovq xmm0,(rax)\n\t"
15248 $$emit$$"add 0x8,rax\n\t"
15249 $$emit$$"dec rcx\n\t"
15250 $$emit$$"jge L_sloop\n\t"
15251 $$emit$$"# L_end:\n\t"
15252 } else {
15253 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15254 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15255 }
15256 %}
15257 ins_encode %{
15258 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15259 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15260 %}
15261 ins_pipe(pipe_slow);
15262 %}
15263
15264 // Small constant length ClearArray for AVX512 targets.
15265 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15266 %{
15267 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15268 match(Set dummy (ClearArray cnt base));
15269 ins_cost(100);
15270 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15271 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15272 ins_encode %{
15273 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15274 %}
15275 ins_pipe(pipe_slow);
15276 %}
15277
15278 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15279 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15280 %{
15281 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15282 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15283 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15284
15285 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15286 ins_encode %{
15287 __ string_compare($str1$$Register, $str2$$Register,
15288 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15289 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15290 %}
15291 ins_pipe( pipe_slow );
15292 %}
15293
15294 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15295 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15296 %{
15297 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15298 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15299 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15300
15301 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15302 ins_encode %{
15303 __ string_compare($str1$$Register, $str2$$Register,
15304 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15305 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15306 %}
15307 ins_pipe( pipe_slow );
15308 %}
15309
15310 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15311 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15312 %{
15313 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15314 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15315 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15316
15317 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15318 ins_encode %{
15319 __ string_compare($str1$$Register, $str2$$Register,
15320 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15321 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15322 %}
15323 ins_pipe( pipe_slow );
15324 %}
15325
15326 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15327 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15328 %{
15329 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15330 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15331 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15332
15333 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15334 ins_encode %{
15335 __ string_compare($str1$$Register, $str2$$Register,
15336 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15337 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15338 %}
15339 ins_pipe( pipe_slow );
15340 %}
15341
15342 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15343 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15344 %{
15345 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15346 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15347 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15348
15349 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15350 ins_encode %{
15351 __ string_compare($str1$$Register, $str2$$Register,
15352 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15353 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15354 %}
15355 ins_pipe( pipe_slow );
15356 %}
15357
15358 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15359 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15360 %{
15361 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15362 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15363 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15364
15365 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15366 ins_encode %{
15367 __ string_compare($str1$$Register, $str2$$Register,
15368 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15369 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15370 %}
15371 ins_pipe( pipe_slow );
15372 %}
15373
15374 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15375 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15376 %{
15377 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15378 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15379 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15380
15381 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15382 ins_encode %{
15383 __ string_compare($str2$$Register, $str1$$Register,
15384 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15385 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15386 %}
15387 ins_pipe( pipe_slow );
15388 %}
15389
15390 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15391 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15392 %{
15393 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15394 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15395 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15396
15397 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15398 ins_encode %{
15399 __ string_compare($str2$$Register, $str1$$Register,
15400 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15401 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15402 %}
15403 ins_pipe( pipe_slow );
15404 %}
15405
15406 // fast search of substring with known size.
15407 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15408 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15409 %{
15410 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15411 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15412 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15413
15414 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15415 ins_encode %{
15416 int icnt2 = (int)$int_cnt2$$constant;
15417 if (icnt2 >= 16) {
15418 // IndexOf for constant substrings with size >= 16 elements
15419 // which don't need to be loaded through stack.
15420 __ string_indexofC8($str1$$Register, $str2$$Register,
15421 $cnt1$$Register, $cnt2$$Register,
15422 icnt2, $result$$Register,
15423 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15424 } else {
15425 // Small strings are loaded through stack if they cross page boundary.
15426 __ string_indexof($str1$$Register, $str2$$Register,
15427 $cnt1$$Register, $cnt2$$Register,
15428 icnt2, $result$$Register,
15429 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15430 }
15431 %}
15432 ins_pipe( pipe_slow );
15433 %}
15434
15435 // fast search of substring with known size.
15436 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15437 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15438 %{
15439 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15440 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15441 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15442
15443 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15444 ins_encode %{
15445 int icnt2 = (int)$int_cnt2$$constant;
15446 if (icnt2 >= 8) {
15447 // IndexOf for constant substrings with size >= 8 elements
15448 // which don't need to be loaded through stack.
15449 __ string_indexofC8($str1$$Register, $str2$$Register,
15450 $cnt1$$Register, $cnt2$$Register,
15451 icnt2, $result$$Register,
15452 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15453 } else {
15454 // Small strings are loaded through stack if they cross page boundary.
15455 __ string_indexof($str1$$Register, $str2$$Register,
15456 $cnt1$$Register, $cnt2$$Register,
15457 icnt2, $result$$Register,
15458 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15459 }
15460 %}
15461 ins_pipe( pipe_slow );
15462 %}
15463
15464 // fast search of substring with known size.
15465 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15466 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15467 %{
15468 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15469 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15470 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15471
15472 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15473 ins_encode %{
15474 int icnt2 = (int)$int_cnt2$$constant;
15475 if (icnt2 >= 8) {
15476 // IndexOf for constant substrings with size >= 8 elements
15477 // which don't need to be loaded through stack.
15478 __ string_indexofC8($str1$$Register, $str2$$Register,
15479 $cnt1$$Register, $cnt2$$Register,
15480 icnt2, $result$$Register,
15481 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15482 } else {
15483 // Small strings are loaded through stack if they cross page boundary.
15484 __ string_indexof($str1$$Register, $str2$$Register,
15485 $cnt1$$Register, $cnt2$$Register,
15486 icnt2, $result$$Register,
15487 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15488 }
15489 %}
15490 ins_pipe( pipe_slow );
15491 %}
15492
15493 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15494 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15495 %{
15496 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15497 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15498 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15499
15500 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15501 ins_encode %{
15502 __ string_indexof($str1$$Register, $str2$$Register,
15503 $cnt1$$Register, $cnt2$$Register,
15504 (-1), $result$$Register,
15505 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15506 %}
15507 ins_pipe( pipe_slow );
15508 %}
15509
15510 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15511 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15512 %{
15513 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15514 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15515 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15516
15517 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15518 ins_encode %{
15519 __ string_indexof($str1$$Register, $str2$$Register,
15520 $cnt1$$Register, $cnt2$$Register,
15521 (-1), $result$$Register,
15522 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15523 %}
15524 ins_pipe( pipe_slow );
15525 %}
15526
15527 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15528 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15529 %{
15530 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15531 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15532 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15533
15534 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15535 ins_encode %{
15536 __ string_indexof($str1$$Register, $str2$$Register,
15537 $cnt1$$Register, $cnt2$$Register,
15538 (-1), $result$$Register,
15539 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15540 %}
15541 ins_pipe( pipe_slow );
15542 %}
15543
15544 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15545 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15546 %{
15547 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15548 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15549 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15550 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15551 ins_encode %{
15552 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15553 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15554 %}
15555 ins_pipe( pipe_slow );
15556 %}
15557
15558 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15559 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15560 %{
15561 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15562 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15563 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15564 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15565 ins_encode %{
15566 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15567 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15568 %}
15569 ins_pipe( pipe_slow );
15570 %}
15571
15572 // fast string equals
15573 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15574 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15575 %{
15576 predicate(!VM_Version::supports_avx512vlbw());
15577 match(Set result (StrEquals (Binary str1 str2) cnt));
15578 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15579
15580 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15581 ins_encode %{
15582 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15583 $cnt$$Register, $result$$Register, $tmp3$$Register,
15584 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15585 %}
15586 ins_pipe( pipe_slow );
15587 %}
15588
15589 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15590 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15591 %{
15592 predicate(VM_Version::supports_avx512vlbw());
15593 match(Set result (StrEquals (Binary str1 str2) cnt));
15594 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15595
15596 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15597 ins_encode %{
15598 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15599 $cnt$$Register, $result$$Register, $tmp3$$Register,
15600 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15601 %}
15602 ins_pipe( pipe_slow );
15603 %}
15604
15605 // fast array equals
15606 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15607 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15608 %{
15609 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15610 match(Set result (AryEq ary1 ary2));
15611 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15612
15613 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15614 ins_encode %{
15615 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15616 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15617 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15618 %}
15619 ins_pipe( pipe_slow );
15620 %}
15621
15622 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15623 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15624 %{
15625 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15626 match(Set result (AryEq ary1 ary2));
15627 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15628
15629 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15630 ins_encode %{
15631 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15632 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15633 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15634 %}
15635 ins_pipe( pipe_slow );
15636 %}
15637
15638 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15639 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15640 %{
15641 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15642 match(Set result (AryEq ary1 ary2));
15643 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15644
15645 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15646 ins_encode %{
15647 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15648 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15649 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15650 %}
15651 ins_pipe( pipe_slow );
15652 %}
15653
15654 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15655 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15656 %{
15657 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15658 match(Set result (AryEq ary1 ary2));
15659 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15660
15661 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15662 ins_encode %{
15663 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15664 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15665 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15666 %}
15667 ins_pipe( pipe_slow );
15668 %}
15669
15670 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15671 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15672 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15673 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15674 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15675 %{
15676 predicate(UseAVX >= 2);
15677 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15678 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15679 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15680 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15681 USE basic_type, KILL cr);
15682
15683 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15684 ins_encode %{
15685 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15686 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15687 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15688 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15689 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15690 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15691 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15692 %}
15693 ins_pipe( pipe_slow );
15694 %}
15695
15696 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15697 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15698 %{
15699 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15700 match(Set result (CountPositives ary1 len));
15701 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15702
15703 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15704 ins_encode %{
15705 __ count_positives($ary1$$Register, $len$$Register,
15706 $result$$Register, $tmp3$$Register,
15707 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15708 %}
15709 ins_pipe( pipe_slow );
15710 %}
15711
15712 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15713 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15714 %{
15715 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15716 match(Set result (CountPositives ary1 len));
15717 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15718
15719 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15720 ins_encode %{
15721 __ count_positives($ary1$$Register, $len$$Register,
15722 $result$$Register, $tmp3$$Register,
15723 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15724 %}
15725 ins_pipe( pipe_slow );
15726 %}
15727
15728 // fast char[] to byte[] compression
15729 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15730 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15731 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15732 match(Set result (StrCompressedCopy src (Binary dst len)));
15733 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15734 USE_KILL len, KILL tmp5, KILL cr);
15735
15736 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15737 ins_encode %{
15738 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15739 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15740 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15741 knoreg, knoreg);
15742 %}
15743 ins_pipe( pipe_slow );
15744 %}
15745
15746 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15747 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15748 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15749 match(Set result (StrCompressedCopy src (Binary dst len)));
15750 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15751 USE_KILL len, KILL tmp5, KILL cr);
15752
15753 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15754 ins_encode %{
15755 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15756 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15757 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15758 $ktmp1$$KRegister, $ktmp2$$KRegister);
15759 %}
15760 ins_pipe( pipe_slow );
15761 %}
15762 // fast byte[] to char[] inflation
15763 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15764 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15765 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15766 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15767 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15768
15769 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15770 ins_encode %{
15771 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15772 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15773 %}
15774 ins_pipe( pipe_slow );
15775 %}
15776
15777 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15778 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15779 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15780 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15781 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15782
15783 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15784 ins_encode %{
15785 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15786 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15787 %}
15788 ins_pipe( pipe_slow );
15789 %}
15790
15791 // encode char[] to byte[] in ISO_8859_1
15792 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15793 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15794 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15795 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15796 match(Set result (EncodeISOArray src (Binary dst len)));
15797 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15798
15799 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15800 ins_encode %{
15801 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15802 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15803 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15804 %}
15805 ins_pipe( pipe_slow );
15806 %}
15807
15808 // encode char[] to byte[] in ASCII
15809 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15810 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15811 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15812 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15813 match(Set result (EncodeISOArray src (Binary dst len)));
15814 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15815
15816 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15817 ins_encode %{
15818 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15819 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15820 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15821 %}
15822 ins_pipe( pipe_slow );
15823 %}
15824
15825 //----------Overflow Math Instructions-----------------------------------------
15826
15827 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15828 %{
15829 match(Set cr (OverflowAddI op1 op2));
15830 effect(DEF cr, USE_KILL op1, USE op2);
15831
15832 format %{ "addl $op1, $op2\t# overflow check int" %}
15833
15834 ins_encode %{
15835 __ addl($op1$$Register, $op2$$Register);
15836 %}
15837 ins_pipe(ialu_reg_reg);
15838 %}
15839
15840 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15841 %{
15842 match(Set cr (OverflowAddI op1 op2));
15843 effect(DEF cr, USE_KILL op1, USE op2);
15844
15845 format %{ "addl $op1, $op2\t# overflow check int" %}
15846
15847 ins_encode %{
15848 __ addl($op1$$Register, $op2$$constant);
15849 %}
15850 ins_pipe(ialu_reg_reg);
15851 %}
15852
15853 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15854 %{
15855 match(Set cr (OverflowAddL op1 op2));
15856 effect(DEF cr, USE_KILL op1, USE op2);
15857
15858 format %{ "addq $op1, $op2\t# overflow check long" %}
15859 ins_encode %{
15860 __ addq($op1$$Register, $op2$$Register);
15861 %}
15862 ins_pipe(ialu_reg_reg);
15863 %}
15864
15865 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15866 %{
15867 match(Set cr (OverflowAddL op1 op2));
15868 effect(DEF cr, USE_KILL op1, USE op2);
15869
15870 format %{ "addq $op1, $op2\t# overflow check long" %}
15871 ins_encode %{
15872 __ addq($op1$$Register, $op2$$constant);
15873 %}
15874 ins_pipe(ialu_reg_reg);
15875 %}
15876
15877 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15878 %{
15879 match(Set cr (OverflowSubI op1 op2));
15880
15881 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15882 ins_encode %{
15883 __ cmpl($op1$$Register, $op2$$Register);
15884 %}
15885 ins_pipe(ialu_reg_reg);
15886 %}
15887
15888 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15889 %{
15890 match(Set cr (OverflowSubI op1 op2));
15891
15892 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15893 ins_encode %{
15894 __ cmpl($op1$$Register, $op2$$constant);
15895 %}
15896 ins_pipe(ialu_reg_reg);
15897 %}
15898
15899 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15900 %{
15901 match(Set cr (OverflowSubL op1 op2));
15902
15903 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15904 ins_encode %{
15905 __ cmpq($op1$$Register, $op2$$Register);
15906 %}
15907 ins_pipe(ialu_reg_reg);
15908 %}
15909
15910 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15911 %{
15912 match(Set cr (OverflowSubL op1 op2));
15913
15914 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15915 ins_encode %{
15916 __ cmpq($op1$$Register, $op2$$constant);
15917 %}
15918 ins_pipe(ialu_reg_reg);
15919 %}
15920
15921 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15922 %{
15923 match(Set cr (OverflowSubI zero op2));
15924 effect(DEF cr, USE_KILL op2);
15925
15926 format %{ "negl $op2\t# overflow check int" %}
15927 ins_encode %{
15928 __ negl($op2$$Register);
15929 %}
15930 ins_pipe(ialu_reg_reg);
15931 %}
15932
15933 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15934 %{
15935 match(Set cr (OverflowSubL zero op2));
15936 effect(DEF cr, USE_KILL op2);
15937
15938 format %{ "negq $op2\t# overflow check long" %}
15939 ins_encode %{
15940 __ negq($op2$$Register);
15941 %}
15942 ins_pipe(ialu_reg_reg);
15943 %}
15944
15945 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15946 %{
15947 match(Set cr (OverflowMulI op1 op2));
15948 effect(DEF cr, USE_KILL op1, USE op2);
15949
15950 format %{ "imull $op1, $op2\t# overflow check int" %}
15951 ins_encode %{
15952 __ imull($op1$$Register, $op2$$Register);
15953 %}
15954 ins_pipe(ialu_reg_reg_alu0);
15955 %}
15956
15957 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15958 %{
15959 match(Set cr (OverflowMulI op1 op2));
15960 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15961
15962 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
15963 ins_encode %{
15964 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15965 %}
15966 ins_pipe(ialu_reg_reg_alu0);
15967 %}
15968
15969 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15970 %{
15971 match(Set cr (OverflowMulL op1 op2));
15972 effect(DEF cr, USE_KILL op1, USE op2);
15973
15974 format %{ "imulq $op1, $op2\t# overflow check long" %}
15975 ins_encode %{
15976 __ imulq($op1$$Register, $op2$$Register);
15977 %}
15978 ins_pipe(ialu_reg_reg_alu0);
15979 %}
15980
15981 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15982 %{
15983 match(Set cr (OverflowMulL op1 op2));
15984 effect(DEF cr, TEMP tmp, USE op1, USE op2);
15985
15986 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
15987 ins_encode %{
15988 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15989 %}
15990 ins_pipe(ialu_reg_reg_alu0);
15991 %}
15992
15993
15994 //----------Control Flow Instructions------------------------------------------
15995 // Signed compare Instructions
15996
15997 // XXX more variants!!
15998 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15999 %{
16000 match(Set cr (CmpI op1 op2));
16001 effect(DEF cr, USE op1, USE op2);
16002
16003 format %{ "cmpl $op1, $op2" %}
16004 ins_encode %{
16005 __ cmpl($op1$$Register, $op2$$Register);
16006 %}
16007 ins_pipe(ialu_cr_reg_reg);
16008 %}
16009
16010 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16011 %{
16012 match(Set cr (CmpI op1 op2));
16013
16014 format %{ "cmpl $op1, $op2" %}
16015 ins_encode %{
16016 __ cmpl($op1$$Register, $op2$$constant);
16017 %}
16018 ins_pipe(ialu_cr_reg_imm);
16019 %}
16020
16021 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16022 %{
16023 match(Set cr (CmpI op1 (LoadI op2)));
16024
16025 ins_cost(500); // XXX
16026 format %{ "cmpl $op1, $op2" %}
16027 ins_encode %{
16028 __ cmpl($op1$$Register, $op2$$Address);
16029 %}
16030 ins_pipe(ialu_cr_reg_mem);
16031 %}
16032
16033 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16034 %{
16035 match(Set cr (CmpI src zero));
16036
16037 format %{ "testl $src, $src" %}
16038 ins_encode %{
16039 __ testl($src$$Register, $src$$Register);
16040 %}
16041 ins_pipe(ialu_cr_reg_imm);
16042 %}
16043
16044 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16045 %{
16046 match(Set cr (CmpI (AndI src con) zero));
16047
16048 format %{ "testl $src, $con" %}
16049 ins_encode %{
16050 __ testl($src$$Register, $con$$constant);
16051 %}
16052 ins_pipe(ialu_cr_reg_imm);
16053 %}
16054
16055 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16056 %{
16057 match(Set cr (CmpI (AndI src1 src2) zero));
16058
16059 format %{ "testl $src1, $src2" %}
16060 ins_encode %{
16061 __ testl($src1$$Register, $src2$$Register);
16062 %}
16063 ins_pipe(ialu_cr_reg_imm);
16064 %}
16065
16066 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16067 %{
16068 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16069
16070 format %{ "testl $src, $mem" %}
16071 ins_encode %{
16072 __ testl($src$$Register, $mem$$Address);
16073 %}
16074 ins_pipe(ialu_cr_reg_mem);
16075 %}
16076
16077 // Unsigned compare Instructions; really, same as signed except they
16078 // produce an rFlagsRegU instead of rFlagsReg.
16079 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16080 %{
16081 match(Set cr (CmpU op1 op2));
16082
16083 format %{ "cmpl $op1, $op2\t# unsigned" %}
16084 ins_encode %{
16085 __ cmpl($op1$$Register, $op2$$Register);
16086 %}
16087 ins_pipe(ialu_cr_reg_reg);
16088 %}
16089
16090 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16091 %{
16092 match(Set cr (CmpU op1 op2));
16093
16094 format %{ "cmpl $op1, $op2\t# unsigned" %}
16095 ins_encode %{
16096 __ cmpl($op1$$Register, $op2$$constant);
16097 %}
16098 ins_pipe(ialu_cr_reg_imm);
16099 %}
16100
16101 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16102 %{
16103 match(Set cr (CmpU op1 (LoadI op2)));
16104
16105 ins_cost(500); // XXX
16106 format %{ "cmpl $op1, $op2\t# unsigned" %}
16107 ins_encode %{
16108 __ cmpl($op1$$Register, $op2$$Address);
16109 %}
16110 ins_pipe(ialu_cr_reg_mem);
16111 %}
16112
16113 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16114 %{
16115 match(Set cr (CmpU src zero));
16116
16117 format %{ "testl $src, $src\t# unsigned" %}
16118 ins_encode %{
16119 __ testl($src$$Register, $src$$Register);
16120 %}
16121 ins_pipe(ialu_cr_reg_imm);
16122 %}
16123
16124 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16125 %{
16126 match(Set cr (CmpP op1 op2));
16127
16128 format %{ "cmpq $op1, $op2\t# ptr" %}
16129 ins_encode %{
16130 __ cmpq($op1$$Register, $op2$$Register);
16131 %}
16132 ins_pipe(ialu_cr_reg_reg);
16133 %}
16134
16135 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16136 %{
16137 match(Set cr (CmpP op1 (LoadP op2)));
16138 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16139
16140 ins_cost(500); // XXX
16141 format %{ "cmpq $op1, $op2\t# ptr" %}
16142 ins_encode %{
16143 __ cmpq($op1$$Register, $op2$$Address);
16144 %}
16145 ins_pipe(ialu_cr_reg_mem);
16146 %}
16147
16148 // XXX this is generalized by compP_rReg_mem???
16149 // Compare raw pointer (used in out-of-heap check).
16150 // Only works because non-oop pointers must be raw pointers
16151 // and raw pointers have no anti-dependencies.
16152 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16153 %{
16154 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16155 n->in(2)->as_Load()->barrier_data() == 0);
16156 match(Set cr (CmpP op1 (LoadP op2)));
16157
16158 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16159 ins_encode %{
16160 __ cmpq($op1$$Register, $op2$$Address);
16161 %}
16162 ins_pipe(ialu_cr_reg_mem);
16163 %}
16164
16165 // This will generate a signed flags result. This should be OK since
16166 // any compare to a zero should be eq/neq.
16167 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16168 %{
16169 match(Set cr (CmpP src zero));
16170
16171 format %{ "testq $src, $src\t# ptr" %}
16172 ins_encode %{
16173 __ testq($src$$Register, $src$$Register);
16174 %}
16175 ins_pipe(ialu_cr_reg_imm);
16176 %}
16177
16178 // This will generate a signed flags result. This should be OK since
16179 // any compare to a zero should be eq/neq.
16180 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16181 %{
16182 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16183 n->in(1)->as_Load()->barrier_data() == 0);
16184 match(Set cr (CmpP (LoadP op) zero));
16185
16186 ins_cost(500); // XXX
16187 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16188 ins_encode %{
16189 __ testq($op$$Address, 0xFFFFFFFF);
16190 %}
16191 ins_pipe(ialu_cr_reg_imm);
16192 %}
16193
16194 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16195 %{
16196 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16197 n->in(1)->as_Load()->barrier_data() == 0);
16198 match(Set cr (CmpP (LoadP mem) zero));
16199
16200 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16201 ins_encode %{
16202 __ cmpq(r12, $mem$$Address);
16203 %}
16204 ins_pipe(ialu_cr_reg_mem);
16205 %}
16206
16207 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16208 %{
16209 match(Set cr (CmpN op1 op2));
16210
16211 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16212 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16213 ins_pipe(ialu_cr_reg_reg);
16214 %}
16215
16216 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16217 %{
16218 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16219 match(Set cr (CmpN src (LoadN mem)));
16220
16221 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16222 ins_encode %{
16223 __ cmpl($src$$Register, $mem$$Address);
16224 %}
16225 ins_pipe(ialu_cr_reg_mem);
16226 %}
16227
16228 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16229 match(Set cr (CmpN op1 op2));
16230
16231 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16232 ins_encode %{
16233 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16234 %}
16235 ins_pipe(ialu_cr_reg_imm);
16236 %}
16237
16238 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16239 %{
16240 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16241 match(Set cr (CmpN src (LoadN mem)));
16242
16243 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16244 ins_encode %{
16245 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16246 %}
16247 ins_pipe(ialu_cr_reg_mem);
16248 %}
16249
16250 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16251 match(Set cr (CmpN op1 op2));
16252
16253 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16254 ins_encode %{
16255 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16256 %}
16257 ins_pipe(ialu_cr_reg_imm);
16258 %}
16259
16260 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16261 %{
16262 predicate(!UseCompactObjectHeaders);
16263 match(Set cr (CmpN src (LoadNKlass mem)));
16264
16265 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16266 ins_encode %{
16267 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16268 %}
16269 ins_pipe(ialu_cr_reg_mem);
16270 %}
16271
16272 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16273 match(Set cr (CmpN src zero));
16274
16275 format %{ "testl $src, $src\t# compressed ptr" %}
16276 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16277 ins_pipe(ialu_cr_reg_imm);
16278 %}
16279
16280 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16281 %{
16282 predicate(CompressedOops::base() != nullptr &&
16283 n->in(1)->as_Load()->barrier_data() == 0);
16284 match(Set cr (CmpN (LoadN mem) zero));
16285
16286 ins_cost(500); // XXX
16287 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16288 ins_encode %{
16289 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16290 %}
16291 ins_pipe(ialu_cr_reg_mem);
16292 %}
16293
16294 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16295 %{
16296 predicate(CompressedOops::base() == nullptr &&
16297 n->in(1)->as_Load()->barrier_data() == 0);
16298 match(Set cr (CmpN (LoadN mem) zero));
16299
16300 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16301 ins_encode %{
16302 __ cmpl(r12, $mem$$Address);
16303 %}
16304 ins_pipe(ialu_cr_reg_mem);
16305 %}
16306
16307 // Yanked all unsigned pointer compare operations.
16308 // Pointer compares are done with CmpP which is already unsigned.
16309
16310 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16311 %{
16312 match(Set cr (CmpL op1 op2));
16313
16314 format %{ "cmpq $op1, $op2" %}
16315 ins_encode %{
16316 __ cmpq($op1$$Register, $op2$$Register);
16317 %}
16318 ins_pipe(ialu_cr_reg_reg);
16319 %}
16320
16321 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16322 %{
16323 match(Set cr (CmpL op1 op2));
16324
16325 format %{ "cmpq $op1, $op2" %}
16326 ins_encode %{
16327 __ cmpq($op1$$Register, $op2$$constant);
16328 %}
16329 ins_pipe(ialu_cr_reg_imm);
16330 %}
16331
16332 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16333 %{
16334 match(Set cr (CmpL op1 (LoadL op2)));
16335
16336 format %{ "cmpq $op1, $op2" %}
16337 ins_encode %{
16338 __ cmpq($op1$$Register, $op2$$Address);
16339 %}
16340 ins_pipe(ialu_cr_reg_mem);
16341 %}
16342
16343 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16344 %{
16345 match(Set cr (CmpL src zero));
16346
16347 format %{ "testq $src, $src" %}
16348 ins_encode %{
16349 __ testq($src$$Register, $src$$Register);
16350 %}
16351 ins_pipe(ialu_cr_reg_imm);
16352 %}
16353
16354 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16355 %{
16356 match(Set cr (CmpL (AndL src con) zero));
16357
16358 format %{ "testq $src, $con\t# long" %}
16359 ins_encode %{
16360 __ testq($src$$Register, $con$$constant);
16361 %}
16362 ins_pipe(ialu_cr_reg_imm);
16363 %}
16364
16365 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16366 %{
16367 match(Set cr (CmpL (AndL src1 src2) zero));
16368
16369 format %{ "testq $src1, $src2\t# long" %}
16370 ins_encode %{
16371 __ testq($src1$$Register, $src2$$Register);
16372 %}
16373 ins_pipe(ialu_cr_reg_imm);
16374 %}
16375
16376 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16377 %{
16378 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16379
16380 format %{ "testq $src, $mem" %}
16381 ins_encode %{
16382 __ testq($src$$Register, $mem$$Address);
16383 %}
16384 ins_pipe(ialu_cr_reg_mem);
16385 %}
16386
16387 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16388 %{
16389 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16390
16391 format %{ "testq $src, $mem" %}
16392 ins_encode %{
16393 __ testq($src$$Register, $mem$$Address);
16394 %}
16395 ins_pipe(ialu_cr_reg_mem);
16396 %}
16397
16398 // Manifest a CmpU result in an integer register. Very painful.
16399 // This is the test to avoid.
16400 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16401 %{
16402 match(Set dst (CmpU3 src1 src2));
16403 effect(KILL flags);
16404
16405 ins_cost(275); // XXX
16406 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16407 "movl $dst, -1\n\t"
16408 "jb,u done\n\t"
16409 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16410 "done:" %}
16411 ins_encode %{
16412 Label done;
16413 __ cmpl($src1$$Register, $src2$$Register);
16414 __ movl($dst$$Register, -1);
16415 __ jccb(Assembler::below, done);
16416 __ setcc(Assembler::notZero, $dst$$Register);
16417 __ bind(done);
16418 %}
16419 ins_pipe(pipe_slow);
16420 %}
16421
16422 // Manifest a CmpL result in an integer register. Very painful.
16423 // This is the test to avoid.
16424 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16425 %{
16426 match(Set dst (CmpL3 src1 src2));
16427 effect(KILL flags);
16428
16429 ins_cost(275); // XXX
16430 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16431 "movl $dst, -1\n\t"
16432 "jl,s done\n\t"
16433 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16434 "done:" %}
16435 ins_encode %{
16436 Label done;
16437 __ cmpq($src1$$Register, $src2$$Register);
16438 __ movl($dst$$Register, -1);
16439 __ jccb(Assembler::less, done);
16440 __ setcc(Assembler::notZero, $dst$$Register);
16441 __ bind(done);
16442 %}
16443 ins_pipe(pipe_slow);
16444 %}
16445
16446 // Manifest a CmpUL result in an integer register. Very painful.
16447 // This is the test to avoid.
16448 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16449 %{
16450 match(Set dst (CmpUL3 src1 src2));
16451 effect(KILL flags);
16452
16453 ins_cost(275); // XXX
16454 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16455 "movl $dst, -1\n\t"
16456 "jb,u done\n\t"
16457 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16458 "done:" %}
16459 ins_encode %{
16460 Label done;
16461 __ cmpq($src1$$Register, $src2$$Register);
16462 __ movl($dst$$Register, -1);
16463 __ jccb(Assembler::below, done);
16464 __ setcc(Assembler::notZero, $dst$$Register);
16465 __ bind(done);
16466 %}
16467 ins_pipe(pipe_slow);
16468 %}
16469
16470 // Unsigned long compare Instructions; really, same as signed long except they
16471 // produce an rFlagsRegU instead of rFlagsReg.
16472 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16473 %{
16474 match(Set cr (CmpUL op1 op2));
16475
16476 format %{ "cmpq $op1, $op2\t# unsigned" %}
16477 ins_encode %{
16478 __ cmpq($op1$$Register, $op2$$Register);
16479 %}
16480 ins_pipe(ialu_cr_reg_reg);
16481 %}
16482
16483 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16484 %{
16485 match(Set cr (CmpUL op1 op2));
16486
16487 format %{ "cmpq $op1, $op2\t# unsigned" %}
16488 ins_encode %{
16489 __ cmpq($op1$$Register, $op2$$constant);
16490 %}
16491 ins_pipe(ialu_cr_reg_imm);
16492 %}
16493
16494 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16495 %{
16496 match(Set cr (CmpUL op1 (LoadL op2)));
16497
16498 format %{ "cmpq $op1, $op2\t# unsigned" %}
16499 ins_encode %{
16500 __ cmpq($op1$$Register, $op2$$Address);
16501 %}
16502 ins_pipe(ialu_cr_reg_mem);
16503 %}
16504
16505 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16506 %{
16507 match(Set cr (CmpUL src zero));
16508
16509 format %{ "testq $src, $src\t# unsigned" %}
16510 ins_encode %{
16511 __ testq($src$$Register, $src$$Register);
16512 %}
16513 ins_pipe(ialu_cr_reg_imm);
16514 %}
16515
16516 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16517 %{
16518 match(Set cr (CmpI (LoadB mem) imm));
16519
16520 ins_cost(125);
16521 format %{ "cmpb $mem, $imm" %}
16522 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16523 ins_pipe(ialu_cr_reg_mem);
16524 %}
16525
16526 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16527 %{
16528 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16529
16530 ins_cost(125);
16531 format %{ "testb $mem, $imm\t# ubyte" %}
16532 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16533 ins_pipe(ialu_cr_reg_mem);
16534 %}
16535
16536 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16537 %{
16538 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16539
16540 ins_cost(125);
16541 format %{ "testb $mem, $imm\t# byte" %}
16542 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16543 ins_pipe(ialu_cr_reg_mem);
16544 %}
16545
16546 //----------Max and Min--------------------------------------------------------
16547 // Min Instructions
16548
16549 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16550 %{
16551 predicate(!UseAPX);
16552 effect(USE_DEF dst, USE src, USE cr);
16553
16554 format %{ "cmovlgt $dst, $src\t# min" %}
16555 ins_encode %{
16556 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16557 %}
16558 ins_pipe(pipe_cmov_reg);
16559 %}
16560
16561 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16562 %{
16563 predicate(UseAPX);
16564 effect(DEF dst, USE src1, USE src2, USE cr);
16565
16566 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16567 ins_encode %{
16568 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16569 %}
16570 ins_pipe(pipe_cmov_reg);
16571 %}
16572
16573 instruct minI_rReg(rRegI dst, rRegI src)
16574 %{
16575 predicate(!UseAPX);
16576 match(Set dst (MinI dst src));
16577
16578 ins_cost(200);
16579 expand %{
16580 rFlagsReg cr;
16581 compI_rReg(cr, dst, src);
16582 cmovI_reg_g(dst, src, cr);
16583 %}
16584 %}
16585
16586 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16587 %{
16588 predicate(UseAPX);
16589 match(Set dst (MinI src1 src2));
16590 effect(DEF dst, USE src1, USE src2);
16591
16592 ins_cost(200);
16593 expand %{
16594 rFlagsReg cr;
16595 compI_rReg(cr, src1, src2);
16596 cmovI_reg_g_ndd(dst, src1, src2, cr);
16597 %}
16598 %}
16599
16600 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16601 %{
16602 predicate(!UseAPX);
16603 effect(USE_DEF dst, USE src, USE cr);
16604
16605 format %{ "cmovllt $dst, $src\t# max" %}
16606 ins_encode %{
16607 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16608 %}
16609 ins_pipe(pipe_cmov_reg);
16610 %}
16611
16612 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16613 %{
16614 predicate(UseAPX);
16615 effect(DEF dst, USE src1, USE src2, USE cr);
16616
16617 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16618 ins_encode %{
16619 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16620 %}
16621 ins_pipe(pipe_cmov_reg);
16622 %}
16623
16624 instruct maxI_rReg(rRegI dst, rRegI src)
16625 %{
16626 predicate(!UseAPX);
16627 match(Set dst (MaxI dst src));
16628
16629 ins_cost(200);
16630 expand %{
16631 rFlagsReg cr;
16632 compI_rReg(cr, dst, src);
16633 cmovI_reg_l(dst, src, cr);
16634 %}
16635 %}
16636
16637 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16638 %{
16639 predicate(UseAPX);
16640 match(Set dst (MaxI src1 src2));
16641 effect(DEF dst, USE src1, USE src2);
16642
16643 ins_cost(200);
16644 expand %{
16645 rFlagsReg cr;
16646 compI_rReg(cr, src1, src2);
16647 cmovI_reg_l_ndd(dst, src1, src2, cr);
16648 %}
16649 %}
16650
16651 // ============================================================================
16652 // Branch Instructions
16653
16654 // Jump Direct - Label defines a relative address from JMP+1
16655 instruct jmpDir(label labl)
16656 %{
16657 match(Goto);
16658 effect(USE labl);
16659
16660 ins_cost(300);
16661 format %{ "jmp $labl" %}
16662 size(5);
16663 ins_encode %{
16664 Label* L = $labl$$label;
16665 __ jmp(*L, false); // Always long jump
16666 %}
16667 ins_pipe(pipe_jmp);
16668 %}
16669
16670 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16671 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16672 %{
16673 match(If cop cr);
16674 effect(USE labl);
16675
16676 ins_cost(300);
16677 format %{ "j$cop $labl" %}
16678 size(6);
16679 ins_encode %{
16680 Label* L = $labl$$label;
16681 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16682 %}
16683 ins_pipe(pipe_jcc);
16684 %}
16685
16686 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16687 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16688 %{
16689 match(CountedLoopEnd cop cr);
16690 effect(USE labl);
16691
16692 ins_cost(300);
16693 format %{ "j$cop $labl\t# loop end" %}
16694 size(6);
16695 ins_encode %{
16696 Label* L = $labl$$label;
16697 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16698 %}
16699 ins_pipe(pipe_jcc);
16700 %}
16701
16702 // Jump Direct Conditional - using unsigned comparison
16703 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16704 match(If cop cmp);
16705 effect(USE labl);
16706
16707 ins_cost(300);
16708 format %{ "j$cop,u $labl" %}
16709 size(6);
16710 ins_encode %{
16711 Label* L = $labl$$label;
16712 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16713 %}
16714 ins_pipe(pipe_jcc);
16715 %}
16716
16717 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16718 match(If cop cmp);
16719 effect(USE labl);
16720
16721 ins_cost(200);
16722 format %{ "j$cop,u $labl" %}
16723 size(6);
16724 ins_encode %{
16725 Label* L = $labl$$label;
16726 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16727 %}
16728 ins_pipe(pipe_jcc);
16729 %}
16730
16731 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16732 match(If cop cmp);
16733 effect(USE labl);
16734
16735 ins_cost(200);
16736 format %{ $$template
16737 if ($cop$$cmpcode == Assembler::notEqual) {
16738 $$emit$$"jp,u $labl\n\t"
16739 $$emit$$"j$cop,u $labl"
16740 } else {
16741 $$emit$$"jp,u done\n\t"
16742 $$emit$$"j$cop,u $labl\n\t"
16743 $$emit$$"done:"
16744 }
16745 %}
16746 ins_encode %{
16747 Label* l = $labl$$label;
16748 if ($cop$$cmpcode == Assembler::notEqual) {
16749 __ jcc(Assembler::parity, *l, false);
16750 __ jcc(Assembler::notEqual, *l, false);
16751 } else if ($cop$$cmpcode == Assembler::equal) {
16752 Label done;
16753 __ jccb(Assembler::parity, done);
16754 __ jcc(Assembler::equal, *l, false);
16755 __ bind(done);
16756 } else {
16757 ShouldNotReachHere();
16758 }
16759 %}
16760 ins_pipe(pipe_jcc);
16761 %}
16762
16763 // ============================================================================
16764 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16765 // superklass array for an instance of the superklass. Set a hidden
16766 // internal cache on a hit (cache is checked with exposed code in
16767 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16768 // encoding ALSO sets flags.
16769
16770 instruct partialSubtypeCheck(rdi_RegP result,
16771 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16772 rFlagsReg cr)
16773 %{
16774 match(Set result (PartialSubtypeCheck sub super));
16775 predicate(!UseSecondarySupersTable);
16776 effect(KILL rcx, KILL cr);
16777
16778 ins_cost(1100); // slightly larger than the next version
16779 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16780 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16781 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16782 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16783 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16784 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16785 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16786 "miss:\t" %}
16787
16788 ins_encode %{
16789 Label miss;
16790 // NB: Callers may assume that, when $result is a valid register,
16791 // check_klass_subtype_slow_path_linear sets it to a nonzero
16792 // value.
16793 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16794 $rcx$$Register, $result$$Register,
16795 nullptr, &miss,
16796 /*set_cond_codes:*/ true);
16797 __ xorptr($result$$Register, $result$$Register);
16798 __ bind(miss);
16799 %}
16800
16801 ins_pipe(pipe_slow);
16802 %}
16803
16804 // ============================================================================
16805 // Two versions of hashtable-based partialSubtypeCheck, both used when
16806 // we need to search for a super class in the secondary supers array.
16807 // The first is used when we don't know _a priori_ the class being
16808 // searched for. The second, far more common, is used when we do know:
16809 // this is used for instanceof, checkcast, and any case where C2 can
16810 // determine it by constant propagation.
16811
16812 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16813 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16814 rFlagsReg cr)
16815 %{
16816 match(Set result (PartialSubtypeCheck sub super));
16817 predicate(UseSecondarySupersTable);
16818 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16819
16820 ins_cost(1000);
16821 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16822
16823 ins_encode %{
16824 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16825 $temp3$$Register, $temp4$$Register, $result$$Register);
16826 %}
16827
16828 ins_pipe(pipe_slow);
16829 %}
16830
16831 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16832 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16833 rFlagsReg cr)
16834 %{
16835 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16836 predicate(UseSecondarySupersTable);
16837 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16838
16839 ins_cost(700); // smaller than the next version
16840 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16841
16842 ins_encode %{
16843 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16844 if (InlineSecondarySupersTest) {
16845 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16846 $temp3$$Register, $temp4$$Register, $result$$Register,
16847 super_klass_slot);
16848 } else {
16849 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16850 }
16851 %}
16852
16853 ins_pipe(pipe_slow);
16854 %}
16855
16856 // ============================================================================
16857 // Branch Instructions -- short offset versions
16858 //
16859 // These instructions are used to replace jumps of a long offset (the default
16860 // match) with jumps of a shorter offset. These instructions are all tagged
16861 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16862 // match rules in general matching. Instead, the ADLC generates a conversion
16863 // method in the MachNode which can be used to do in-place replacement of the
16864 // long variant with the shorter variant. The compiler will determine if a
16865 // branch can be taken by the is_short_branch_offset() predicate in the machine
16866 // specific code section of the file.
16867
16868 // Jump Direct - Label defines a relative address from JMP+1
16869 instruct jmpDir_short(label labl) %{
16870 match(Goto);
16871 effect(USE labl);
16872
16873 ins_cost(300);
16874 format %{ "jmp,s $labl" %}
16875 size(2);
16876 ins_encode %{
16877 Label* L = $labl$$label;
16878 __ jmpb(*L);
16879 %}
16880 ins_pipe(pipe_jmp);
16881 ins_short_branch(1);
16882 %}
16883
16884 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16885 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16886 match(If cop cr);
16887 effect(USE labl);
16888
16889 ins_cost(300);
16890 format %{ "j$cop,s $labl" %}
16891 size(2);
16892 ins_encode %{
16893 Label* L = $labl$$label;
16894 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16895 %}
16896 ins_pipe(pipe_jcc);
16897 ins_short_branch(1);
16898 %}
16899
16900 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16901 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16902 match(CountedLoopEnd cop cr);
16903 effect(USE labl);
16904
16905 ins_cost(300);
16906 format %{ "j$cop,s $labl\t# loop end" %}
16907 size(2);
16908 ins_encode %{
16909 Label* L = $labl$$label;
16910 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16911 %}
16912 ins_pipe(pipe_jcc);
16913 ins_short_branch(1);
16914 %}
16915
16916 // Jump Direct Conditional - using unsigned comparison
16917 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16918 match(If cop cmp);
16919 effect(USE labl);
16920
16921 ins_cost(300);
16922 format %{ "j$cop,us $labl" %}
16923 size(2);
16924 ins_encode %{
16925 Label* L = $labl$$label;
16926 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16927 %}
16928 ins_pipe(pipe_jcc);
16929 ins_short_branch(1);
16930 %}
16931
16932 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16933 match(If cop cmp);
16934 effect(USE labl);
16935
16936 ins_cost(300);
16937 format %{ "j$cop,us $labl" %}
16938 size(2);
16939 ins_encode %{
16940 Label* L = $labl$$label;
16941 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16942 %}
16943 ins_pipe(pipe_jcc);
16944 ins_short_branch(1);
16945 %}
16946
16947 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16948 match(If cop cmp);
16949 effect(USE labl);
16950
16951 ins_cost(300);
16952 format %{ $$template
16953 if ($cop$$cmpcode == Assembler::notEqual) {
16954 $$emit$$"jp,u,s $labl\n\t"
16955 $$emit$$"j$cop,u,s $labl"
16956 } else {
16957 $$emit$$"jp,u,s done\n\t"
16958 $$emit$$"j$cop,u,s $labl\n\t"
16959 $$emit$$"done:"
16960 }
16961 %}
16962 size(4);
16963 ins_encode %{
16964 Label* l = $labl$$label;
16965 if ($cop$$cmpcode == Assembler::notEqual) {
16966 __ jccb(Assembler::parity, *l);
16967 __ jccb(Assembler::notEqual, *l);
16968 } else if ($cop$$cmpcode == Assembler::equal) {
16969 Label done;
16970 __ jccb(Assembler::parity, done);
16971 __ jccb(Assembler::equal, *l);
16972 __ bind(done);
16973 } else {
16974 ShouldNotReachHere();
16975 }
16976 %}
16977 ins_pipe(pipe_jcc);
16978 ins_short_branch(1);
16979 %}
16980
16981 // ============================================================================
16982 // inlined locking and unlocking
16983
16984 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16985 match(Set cr (FastLock object box));
16986 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16987 ins_cost(300);
16988 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16989 ins_encode %{
16990 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16991 %}
16992 ins_pipe(pipe_slow);
16993 %}
16994
16995 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16996 match(Set cr (FastUnlock object rax_reg));
16997 effect(TEMP tmp, USE_KILL rax_reg);
16998 ins_cost(300);
16999 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17000 ins_encode %{
17001 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17002 %}
17003 ins_pipe(pipe_slow);
17004 %}
17005
17006
17007 // ============================================================================
17008 // Safepoint Instructions
17009 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17010 %{
17011 match(SafePoint poll);
17012 effect(KILL cr, USE poll);
17013
17014 format %{ "testl rax, [$poll]\t"
17015 "# Safepoint: poll for GC" %}
17016 ins_cost(125);
17017 ins_encode %{
17018 __ relocate(relocInfo::poll_type);
17019 address pre_pc = __ pc();
17020 __ testl(rax, Address($poll$$Register, 0));
17021 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17022 %}
17023 ins_pipe(ialu_reg_mem);
17024 %}
17025
17026 instruct mask_all_evexL(kReg dst, rRegL src) %{
17027 match(Set dst (MaskAll src));
17028 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17029 ins_encode %{
17030 int mask_len = Matcher::vector_length(this);
17031 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17032 %}
17033 ins_pipe( pipe_slow );
17034 %}
17035
17036 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17037 predicate(Matcher::vector_length(n) > 32);
17038 match(Set dst (MaskAll src));
17039 effect(TEMP tmp);
17040 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17041 ins_encode %{
17042 int mask_len = Matcher::vector_length(this);
17043 __ movslq($tmp$$Register, $src$$Register);
17044 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17045 %}
17046 ins_pipe( pipe_slow );
17047 %}
17048
17049 // ============================================================================
17050 // Procedure Call/Return Instructions
17051 // Call Java Static Instruction
17052 // Note: If this code changes, the corresponding ret_addr_offset() and
17053 // compute_padding() functions will have to be adjusted.
17054 instruct CallStaticJavaDirect(method meth) %{
17055 match(CallStaticJava);
17056 effect(USE meth);
17057
17058 ins_cost(300);
17059 format %{ "call,static " %}
17060 opcode(0xE8); /* E8 cd */
17061 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17062 ins_pipe(pipe_slow);
17063 ins_alignment(4);
17064 %}
17065
17066 // Call Java Dynamic Instruction
17067 // Note: If this code changes, the corresponding ret_addr_offset() and
17068 // compute_padding() functions will have to be adjusted.
17069 instruct CallDynamicJavaDirect(method meth)
17070 %{
17071 match(CallDynamicJava);
17072 effect(USE meth);
17073
17074 ins_cost(300);
17075 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17076 "call,dynamic " %}
17077 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17078 ins_pipe(pipe_slow);
17079 ins_alignment(4);
17080 %}
17081
17082 // Call Runtime Instruction
17083 instruct CallRuntimeDirect(method meth)
17084 %{
17085 match(CallRuntime);
17086 effect(USE meth);
17087
17088 ins_cost(300);
17089 format %{ "call,runtime " %}
17090 ins_encode(clear_avx, Java_To_Runtime(meth));
17091 ins_pipe(pipe_slow);
17092 %}
17093
17094 // Call runtime without safepoint
17095 instruct CallLeafDirect(method meth)
17096 %{
17097 match(CallLeaf);
17098 effect(USE meth);
17099
17100 ins_cost(300);
17101 format %{ "call_leaf,runtime " %}
17102 ins_encode(clear_avx, Java_To_Runtime(meth));
17103 ins_pipe(pipe_slow);
17104 %}
17105
17106 // Call runtime without safepoint and with vector arguments
17107 instruct CallLeafDirectVector(method meth)
17108 %{
17109 match(CallLeafVector);
17110 effect(USE meth);
17111
17112 ins_cost(300);
17113 format %{ "call_leaf,vector " %}
17114 ins_encode(Java_To_Runtime(meth));
17115 ins_pipe(pipe_slow);
17116 %}
17117
17118 // Call runtime without safepoint
17119 instruct CallLeafNoFPDirect(method meth)
17120 %{
17121 match(CallLeafNoFP);
17122 effect(USE meth);
17123
17124 ins_cost(300);
17125 format %{ "call_leaf_nofp,runtime " %}
17126 ins_encode(clear_avx, Java_To_Runtime(meth));
17127 ins_pipe(pipe_slow);
17128 %}
17129
17130 // Return Instruction
17131 // Remove the return address & jump to it.
17132 // Notice: We always emit a nop after a ret to make sure there is room
17133 // for safepoint patching
17134 instruct Ret()
17135 %{
17136 match(Return);
17137
17138 format %{ "ret" %}
17139 ins_encode %{
17140 __ ret(0);
17141 %}
17142 ins_pipe(pipe_jmp);
17143 %}
17144
17145 // Tail Call; Jump from runtime stub to Java code.
17146 // Also known as an 'interprocedural jump'.
17147 // Target of jump will eventually return to caller.
17148 // TailJump below removes the return address.
17149 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17150 // emitted just above the TailCall which has reset rbp to the caller state.
17151 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17152 %{
17153 match(TailCall jump_target method_ptr);
17154
17155 ins_cost(300);
17156 format %{ "jmp $jump_target\t# rbx holds method" %}
17157 ins_encode %{
17158 __ jmp($jump_target$$Register);
17159 %}
17160 ins_pipe(pipe_jmp);
17161 %}
17162
17163 // Tail Jump; remove the return address; jump to target.
17164 // TailCall above leaves the return address around.
17165 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17166 %{
17167 match(TailJump jump_target ex_oop);
17168
17169 ins_cost(300);
17170 format %{ "popq rdx\t# pop return address\n\t"
17171 "jmp $jump_target" %}
17172 ins_encode %{
17173 __ popq(as_Register(RDX_enc));
17174 __ jmp($jump_target$$Register);
17175 %}
17176 ins_pipe(pipe_jmp);
17177 %}
17178
17179 // Forward exception.
17180 instruct ForwardExceptionjmp()
17181 %{
17182 match(ForwardException);
17183
17184 format %{ "jmp forward_exception_stub" %}
17185 ins_encode %{
17186 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17187 %}
17188 ins_pipe(pipe_jmp);
17189 %}
17190
17191 // Create exception oop: created by stack-crawling runtime code.
17192 // Created exception is now available to this handler, and is setup
17193 // just prior to jumping to this handler. No code emitted.
17194 instruct CreateException(rax_RegP ex_oop)
17195 %{
17196 match(Set ex_oop (CreateEx));
17197
17198 size(0);
17199 // use the following format syntax
17200 format %{ "# exception oop is in rax; no code emitted" %}
17201 ins_encode();
17202 ins_pipe(empty);
17203 %}
17204
17205 // Rethrow exception:
17206 // The exception oop will come in the first argument position.
17207 // Then JUMP (not call) to the rethrow stub code.
17208 instruct RethrowException()
17209 %{
17210 match(Rethrow);
17211
17212 // use the following format syntax
17213 format %{ "jmp rethrow_stub" %}
17214 ins_encode %{
17215 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17216 %}
17217 ins_pipe(pipe_jmp);
17218 %}
17219
17220 // ============================================================================
17221 // This name is KNOWN by the ADLC and cannot be changed.
17222 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17223 // for this guy.
17224 instruct tlsLoadP(r15_RegP dst) %{
17225 match(Set dst (ThreadLocal));
17226 effect(DEF dst);
17227
17228 size(0);
17229 format %{ "# TLS is in R15" %}
17230 ins_encode( /*empty encoding*/ );
17231 ins_pipe(ialu_reg_reg);
17232 %}
17233
17234 instruct addF_reg(regF dst, regF src) %{
17235 predicate(UseAVX == 0);
17236 match(Set dst (AddF dst src));
17237
17238 format %{ "addss $dst, $src" %}
17239 ins_cost(150);
17240 ins_encode %{
17241 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17242 %}
17243 ins_pipe(pipe_slow);
17244 %}
17245
17246 instruct addF_mem(regF dst, memory src) %{
17247 predicate(UseAVX == 0);
17248 match(Set dst (AddF dst (LoadF src)));
17249
17250 format %{ "addss $dst, $src" %}
17251 ins_cost(150);
17252 ins_encode %{
17253 __ addss($dst$$XMMRegister, $src$$Address);
17254 %}
17255 ins_pipe(pipe_slow);
17256 %}
17257
17258 instruct addF_imm(regF dst, immF con) %{
17259 predicate(UseAVX == 0);
17260 match(Set dst (AddF dst con));
17261 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17262 ins_cost(150);
17263 ins_encode %{
17264 __ addss($dst$$XMMRegister, $constantaddress($con));
17265 %}
17266 ins_pipe(pipe_slow);
17267 %}
17268
17269 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17270 predicate(UseAVX > 0);
17271 match(Set dst (AddF src1 src2));
17272
17273 format %{ "vaddss $dst, $src1, $src2" %}
17274 ins_cost(150);
17275 ins_encode %{
17276 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17277 %}
17278 ins_pipe(pipe_slow);
17279 %}
17280
17281 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17282 predicate(UseAVX > 0);
17283 match(Set dst (AddF src1 (LoadF src2)));
17284
17285 format %{ "vaddss $dst, $src1, $src2" %}
17286 ins_cost(150);
17287 ins_encode %{
17288 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17289 %}
17290 ins_pipe(pipe_slow);
17291 %}
17292
17293 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17294 predicate(UseAVX > 0);
17295 match(Set dst (AddF src con));
17296
17297 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17298 ins_cost(150);
17299 ins_encode %{
17300 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17301 %}
17302 ins_pipe(pipe_slow);
17303 %}
17304
17305 instruct addD_reg(regD dst, regD src) %{
17306 predicate(UseAVX == 0);
17307 match(Set dst (AddD dst src));
17308
17309 format %{ "addsd $dst, $src" %}
17310 ins_cost(150);
17311 ins_encode %{
17312 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17313 %}
17314 ins_pipe(pipe_slow);
17315 %}
17316
17317 instruct addD_mem(regD dst, memory src) %{
17318 predicate(UseAVX == 0);
17319 match(Set dst (AddD dst (LoadD src)));
17320
17321 format %{ "addsd $dst, $src" %}
17322 ins_cost(150);
17323 ins_encode %{
17324 __ addsd($dst$$XMMRegister, $src$$Address);
17325 %}
17326 ins_pipe(pipe_slow);
17327 %}
17328
17329 instruct addD_imm(regD dst, immD con) %{
17330 predicate(UseAVX == 0);
17331 match(Set dst (AddD dst con));
17332 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17333 ins_cost(150);
17334 ins_encode %{
17335 __ addsd($dst$$XMMRegister, $constantaddress($con));
17336 %}
17337 ins_pipe(pipe_slow);
17338 %}
17339
17340 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17341 predicate(UseAVX > 0);
17342 match(Set dst (AddD src1 src2));
17343
17344 format %{ "vaddsd $dst, $src1, $src2" %}
17345 ins_cost(150);
17346 ins_encode %{
17347 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17348 %}
17349 ins_pipe(pipe_slow);
17350 %}
17351
17352 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17353 predicate(UseAVX > 0);
17354 match(Set dst (AddD src1 (LoadD src2)));
17355
17356 format %{ "vaddsd $dst, $src1, $src2" %}
17357 ins_cost(150);
17358 ins_encode %{
17359 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17360 %}
17361 ins_pipe(pipe_slow);
17362 %}
17363
17364 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17365 predicate(UseAVX > 0);
17366 match(Set dst (AddD src con));
17367
17368 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17369 ins_cost(150);
17370 ins_encode %{
17371 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17372 %}
17373 ins_pipe(pipe_slow);
17374 %}
17375
17376 instruct subF_reg(regF dst, regF src) %{
17377 predicate(UseAVX == 0);
17378 match(Set dst (SubF dst src));
17379
17380 format %{ "subss $dst, $src" %}
17381 ins_cost(150);
17382 ins_encode %{
17383 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17384 %}
17385 ins_pipe(pipe_slow);
17386 %}
17387
17388 instruct subF_mem(regF dst, memory src) %{
17389 predicate(UseAVX == 0);
17390 match(Set dst (SubF dst (LoadF src)));
17391
17392 format %{ "subss $dst, $src" %}
17393 ins_cost(150);
17394 ins_encode %{
17395 __ subss($dst$$XMMRegister, $src$$Address);
17396 %}
17397 ins_pipe(pipe_slow);
17398 %}
17399
17400 instruct subF_imm(regF dst, immF con) %{
17401 predicate(UseAVX == 0);
17402 match(Set dst (SubF dst con));
17403 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17404 ins_cost(150);
17405 ins_encode %{
17406 __ subss($dst$$XMMRegister, $constantaddress($con));
17407 %}
17408 ins_pipe(pipe_slow);
17409 %}
17410
17411 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17412 predicate(UseAVX > 0);
17413 match(Set dst (SubF src1 src2));
17414
17415 format %{ "vsubss $dst, $src1, $src2" %}
17416 ins_cost(150);
17417 ins_encode %{
17418 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17419 %}
17420 ins_pipe(pipe_slow);
17421 %}
17422
17423 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17424 predicate(UseAVX > 0);
17425 match(Set dst (SubF src1 (LoadF src2)));
17426
17427 format %{ "vsubss $dst, $src1, $src2" %}
17428 ins_cost(150);
17429 ins_encode %{
17430 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17431 %}
17432 ins_pipe(pipe_slow);
17433 %}
17434
17435 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17436 predicate(UseAVX > 0);
17437 match(Set dst (SubF src con));
17438
17439 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17440 ins_cost(150);
17441 ins_encode %{
17442 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17443 %}
17444 ins_pipe(pipe_slow);
17445 %}
17446
17447 instruct subD_reg(regD dst, regD src) %{
17448 predicate(UseAVX == 0);
17449 match(Set dst (SubD dst src));
17450
17451 format %{ "subsd $dst, $src" %}
17452 ins_cost(150);
17453 ins_encode %{
17454 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17455 %}
17456 ins_pipe(pipe_slow);
17457 %}
17458
17459 instruct subD_mem(regD dst, memory src) %{
17460 predicate(UseAVX == 0);
17461 match(Set dst (SubD dst (LoadD src)));
17462
17463 format %{ "subsd $dst, $src" %}
17464 ins_cost(150);
17465 ins_encode %{
17466 __ subsd($dst$$XMMRegister, $src$$Address);
17467 %}
17468 ins_pipe(pipe_slow);
17469 %}
17470
17471 instruct subD_imm(regD dst, immD con) %{
17472 predicate(UseAVX == 0);
17473 match(Set dst (SubD dst con));
17474 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17475 ins_cost(150);
17476 ins_encode %{
17477 __ subsd($dst$$XMMRegister, $constantaddress($con));
17478 %}
17479 ins_pipe(pipe_slow);
17480 %}
17481
17482 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17483 predicate(UseAVX > 0);
17484 match(Set dst (SubD src1 src2));
17485
17486 format %{ "vsubsd $dst, $src1, $src2" %}
17487 ins_cost(150);
17488 ins_encode %{
17489 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17490 %}
17491 ins_pipe(pipe_slow);
17492 %}
17493
17494 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17495 predicate(UseAVX > 0);
17496 match(Set dst (SubD src1 (LoadD src2)));
17497
17498 format %{ "vsubsd $dst, $src1, $src2" %}
17499 ins_cost(150);
17500 ins_encode %{
17501 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17502 %}
17503 ins_pipe(pipe_slow);
17504 %}
17505
17506 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17507 predicate(UseAVX > 0);
17508 match(Set dst (SubD src con));
17509
17510 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17511 ins_cost(150);
17512 ins_encode %{
17513 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17514 %}
17515 ins_pipe(pipe_slow);
17516 %}
17517
17518 instruct mulF_reg(regF dst, regF src) %{
17519 predicate(UseAVX == 0);
17520 match(Set dst (MulF dst src));
17521
17522 format %{ "mulss $dst, $src" %}
17523 ins_cost(150);
17524 ins_encode %{
17525 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17526 %}
17527 ins_pipe(pipe_slow);
17528 %}
17529
17530 instruct mulF_mem(regF dst, memory src) %{
17531 predicate(UseAVX == 0);
17532 match(Set dst (MulF dst (LoadF src)));
17533
17534 format %{ "mulss $dst, $src" %}
17535 ins_cost(150);
17536 ins_encode %{
17537 __ mulss($dst$$XMMRegister, $src$$Address);
17538 %}
17539 ins_pipe(pipe_slow);
17540 %}
17541
17542 instruct mulF_imm(regF dst, immF con) %{
17543 predicate(UseAVX == 0);
17544 match(Set dst (MulF dst con));
17545 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17546 ins_cost(150);
17547 ins_encode %{
17548 __ mulss($dst$$XMMRegister, $constantaddress($con));
17549 %}
17550 ins_pipe(pipe_slow);
17551 %}
17552
17553 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17554 predicate(UseAVX > 0);
17555 match(Set dst (MulF src1 src2));
17556
17557 format %{ "vmulss $dst, $src1, $src2" %}
17558 ins_cost(150);
17559 ins_encode %{
17560 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17561 %}
17562 ins_pipe(pipe_slow);
17563 %}
17564
17565 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17566 predicate(UseAVX > 0);
17567 match(Set dst (MulF src1 (LoadF src2)));
17568
17569 format %{ "vmulss $dst, $src1, $src2" %}
17570 ins_cost(150);
17571 ins_encode %{
17572 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17573 %}
17574 ins_pipe(pipe_slow);
17575 %}
17576
17577 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17578 predicate(UseAVX > 0);
17579 match(Set dst (MulF src con));
17580
17581 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17582 ins_cost(150);
17583 ins_encode %{
17584 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17585 %}
17586 ins_pipe(pipe_slow);
17587 %}
17588
17589 instruct mulD_reg(regD dst, regD src) %{
17590 predicate(UseAVX == 0);
17591 match(Set dst (MulD dst src));
17592
17593 format %{ "mulsd $dst, $src" %}
17594 ins_cost(150);
17595 ins_encode %{
17596 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17597 %}
17598 ins_pipe(pipe_slow);
17599 %}
17600
17601 instruct mulD_mem(regD dst, memory src) %{
17602 predicate(UseAVX == 0);
17603 match(Set dst (MulD dst (LoadD src)));
17604
17605 format %{ "mulsd $dst, $src" %}
17606 ins_cost(150);
17607 ins_encode %{
17608 __ mulsd($dst$$XMMRegister, $src$$Address);
17609 %}
17610 ins_pipe(pipe_slow);
17611 %}
17612
17613 instruct mulD_imm(regD dst, immD con) %{
17614 predicate(UseAVX == 0);
17615 match(Set dst (MulD dst con));
17616 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17617 ins_cost(150);
17618 ins_encode %{
17619 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17620 %}
17621 ins_pipe(pipe_slow);
17622 %}
17623
17624 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17625 predicate(UseAVX > 0);
17626 match(Set dst (MulD src1 src2));
17627
17628 format %{ "vmulsd $dst, $src1, $src2" %}
17629 ins_cost(150);
17630 ins_encode %{
17631 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17632 %}
17633 ins_pipe(pipe_slow);
17634 %}
17635
17636 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17637 predicate(UseAVX > 0);
17638 match(Set dst (MulD src1 (LoadD src2)));
17639
17640 format %{ "vmulsd $dst, $src1, $src2" %}
17641 ins_cost(150);
17642 ins_encode %{
17643 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17644 %}
17645 ins_pipe(pipe_slow);
17646 %}
17647
17648 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17649 predicate(UseAVX > 0);
17650 match(Set dst (MulD src con));
17651
17652 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17653 ins_cost(150);
17654 ins_encode %{
17655 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17656 %}
17657 ins_pipe(pipe_slow);
17658 %}
17659
17660 instruct divF_reg(regF dst, regF src) %{
17661 predicate(UseAVX == 0);
17662 match(Set dst (DivF dst src));
17663
17664 format %{ "divss $dst, $src" %}
17665 ins_cost(150);
17666 ins_encode %{
17667 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17668 %}
17669 ins_pipe(pipe_slow);
17670 %}
17671
17672 instruct divF_mem(regF dst, memory src) %{
17673 predicate(UseAVX == 0);
17674 match(Set dst (DivF dst (LoadF src)));
17675
17676 format %{ "divss $dst, $src" %}
17677 ins_cost(150);
17678 ins_encode %{
17679 __ divss($dst$$XMMRegister, $src$$Address);
17680 %}
17681 ins_pipe(pipe_slow);
17682 %}
17683
17684 instruct divF_imm(regF dst, immF con) %{
17685 predicate(UseAVX == 0);
17686 match(Set dst (DivF dst con));
17687 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17688 ins_cost(150);
17689 ins_encode %{
17690 __ divss($dst$$XMMRegister, $constantaddress($con));
17691 %}
17692 ins_pipe(pipe_slow);
17693 %}
17694
17695 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17696 predicate(UseAVX > 0);
17697 match(Set dst (DivF src1 src2));
17698
17699 format %{ "vdivss $dst, $src1, $src2" %}
17700 ins_cost(150);
17701 ins_encode %{
17702 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17703 %}
17704 ins_pipe(pipe_slow);
17705 %}
17706
17707 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17708 predicate(UseAVX > 0);
17709 match(Set dst (DivF src1 (LoadF src2)));
17710
17711 format %{ "vdivss $dst, $src1, $src2" %}
17712 ins_cost(150);
17713 ins_encode %{
17714 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17715 %}
17716 ins_pipe(pipe_slow);
17717 %}
17718
17719 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17720 predicate(UseAVX > 0);
17721 match(Set dst (DivF src con));
17722
17723 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17724 ins_cost(150);
17725 ins_encode %{
17726 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17727 %}
17728 ins_pipe(pipe_slow);
17729 %}
17730
17731 instruct divD_reg(regD dst, regD src) %{
17732 predicate(UseAVX == 0);
17733 match(Set dst (DivD dst src));
17734
17735 format %{ "divsd $dst, $src" %}
17736 ins_cost(150);
17737 ins_encode %{
17738 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17739 %}
17740 ins_pipe(pipe_slow);
17741 %}
17742
17743 instruct divD_mem(regD dst, memory src) %{
17744 predicate(UseAVX == 0);
17745 match(Set dst (DivD dst (LoadD src)));
17746
17747 format %{ "divsd $dst, $src" %}
17748 ins_cost(150);
17749 ins_encode %{
17750 __ divsd($dst$$XMMRegister, $src$$Address);
17751 %}
17752 ins_pipe(pipe_slow);
17753 %}
17754
17755 instruct divD_imm(regD dst, immD con) %{
17756 predicate(UseAVX == 0);
17757 match(Set dst (DivD dst con));
17758 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17759 ins_cost(150);
17760 ins_encode %{
17761 __ divsd($dst$$XMMRegister, $constantaddress($con));
17762 %}
17763 ins_pipe(pipe_slow);
17764 %}
17765
17766 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17767 predicate(UseAVX > 0);
17768 match(Set dst (DivD src1 src2));
17769
17770 format %{ "vdivsd $dst, $src1, $src2" %}
17771 ins_cost(150);
17772 ins_encode %{
17773 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17774 %}
17775 ins_pipe(pipe_slow);
17776 %}
17777
17778 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17779 predicate(UseAVX > 0);
17780 match(Set dst (DivD src1 (LoadD src2)));
17781
17782 format %{ "vdivsd $dst, $src1, $src2" %}
17783 ins_cost(150);
17784 ins_encode %{
17785 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17786 %}
17787 ins_pipe(pipe_slow);
17788 %}
17789
17790 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17791 predicate(UseAVX > 0);
17792 match(Set dst (DivD src con));
17793
17794 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17795 ins_cost(150);
17796 ins_encode %{
17797 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17798 %}
17799 ins_pipe(pipe_slow);
17800 %}
17801
17802 instruct absF_reg(regF dst) %{
17803 predicate(UseAVX == 0);
17804 match(Set dst (AbsF dst));
17805 ins_cost(150);
17806 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17807 ins_encode %{
17808 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17809 %}
17810 ins_pipe(pipe_slow);
17811 %}
17812
17813 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17814 predicate(UseAVX > 0);
17815 match(Set dst (AbsF src));
17816 ins_cost(150);
17817 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17818 ins_encode %{
17819 int vlen_enc = Assembler::AVX_128bit;
17820 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17821 ExternalAddress(float_signmask()), vlen_enc);
17822 %}
17823 ins_pipe(pipe_slow);
17824 %}
17825
17826 instruct absD_reg(regD dst) %{
17827 predicate(UseAVX == 0);
17828 match(Set dst (AbsD dst));
17829 ins_cost(150);
17830 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17831 "# abs double by sign masking" %}
17832 ins_encode %{
17833 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17834 %}
17835 ins_pipe(pipe_slow);
17836 %}
17837
17838 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17839 predicate(UseAVX > 0);
17840 match(Set dst (AbsD src));
17841 ins_cost(150);
17842 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17843 "# abs double by sign masking" %}
17844 ins_encode %{
17845 int vlen_enc = Assembler::AVX_128bit;
17846 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17847 ExternalAddress(double_signmask()), vlen_enc);
17848 %}
17849 ins_pipe(pipe_slow);
17850 %}
17851
17852 instruct negF_reg(regF dst) %{
17853 predicate(UseAVX == 0);
17854 match(Set dst (NegF dst));
17855 ins_cost(150);
17856 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17857 ins_encode %{
17858 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17859 %}
17860 ins_pipe(pipe_slow);
17861 %}
17862
17863 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17864 predicate(UseAVX > 0);
17865 match(Set dst (NegF src));
17866 ins_cost(150);
17867 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17868 ins_encode %{
17869 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17870 ExternalAddress(float_signflip()));
17871 %}
17872 ins_pipe(pipe_slow);
17873 %}
17874
17875 instruct negD_reg(regD dst) %{
17876 predicate(UseAVX == 0);
17877 match(Set dst (NegD dst));
17878 ins_cost(150);
17879 format %{ "xorpd $dst, [0x8000000000000000]\t"
17880 "# neg double by sign flipping" %}
17881 ins_encode %{
17882 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17883 %}
17884 ins_pipe(pipe_slow);
17885 %}
17886
17887 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17888 predicate(UseAVX > 0);
17889 match(Set dst (NegD src));
17890 ins_cost(150);
17891 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17892 "# neg double by sign flipping" %}
17893 ins_encode %{
17894 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17895 ExternalAddress(double_signflip()));
17896 %}
17897 ins_pipe(pipe_slow);
17898 %}
17899
17900 // sqrtss instruction needs destination register to be pre initialized for best performance
17901 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17902 instruct sqrtF_reg(regF dst) %{
17903 match(Set dst (SqrtF dst));
17904 format %{ "sqrtss $dst, $dst" %}
17905 ins_encode %{
17906 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17907 %}
17908 ins_pipe(pipe_slow);
17909 %}
17910
17911 // sqrtsd instruction needs destination register to be pre initialized for best performance
17912 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17913 instruct sqrtD_reg(regD dst) %{
17914 match(Set dst (SqrtD dst));
17915 format %{ "sqrtsd $dst, $dst" %}
17916 ins_encode %{
17917 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17918 %}
17919 ins_pipe(pipe_slow);
17920 %}
17921
17922 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17923 effect(TEMP tmp);
17924 match(Set dst (ConvF2HF src));
17925 ins_cost(125);
17926 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17927 ins_encode %{
17928 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17929 %}
17930 ins_pipe( pipe_slow );
17931 %}
17932
17933 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17934 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17935 effect(TEMP ktmp, TEMP rtmp);
17936 match(Set mem (StoreC mem (ConvF2HF src)));
17937 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17938 ins_encode %{
17939 __ movl($rtmp$$Register, 0x1);
17940 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17941 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17942 %}
17943 ins_pipe( pipe_slow );
17944 %}
17945
17946 instruct vconvF2HF(vec dst, vec src) %{
17947 match(Set dst (VectorCastF2HF src));
17948 format %{ "vector_conv_F2HF $dst $src" %}
17949 ins_encode %{
17950 int vlen_enc = vector_length_encoding(this, $src);
17951 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17952 %}
17953 ins_pipe( pipe_slow );
17954 %}
17955
17956 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17957 predicate(n->as_StoreVector()->memory_size() >= 16);
17958 match(Set mem (StoreVector mem (VectorCastF2HF src)));
17959 format %{ "vcvtps2ph $mem,$src" %}
17960 ins_encode %{
17961 int vlen_enc = vector_length_encoding(this, $src);
17962 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17963 %}
17964 ins_pipe( pipe_slow );
17965 %}
17966
17967 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17968 match(Set dst (ConvHF2F src));
17969 format %{ "vcvtph2ps $dst,$src" %}
17970 ins_encode %{
17971 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17972 %}
17973 ins_pipe( pipe_slow );
17974 %}
17975
17976 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17977 match(Set dst (VectorCastHF2F (LoadVector mem)));
17978 format %{ "vcvtph2ps $dst,$mem" %}
17979 ins_encode %{
17980 int vlen_enc = vector_length_encoding(this);
17981 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17982 %}
17983 ins_pipe( pipe_slow );
17984 %}
17985
17986 instruct vconvHF2F(vec dst, vec src) %{
17987 match(Set dst (VectorCastHF2F src));
17988 ins_cost(125);
17989 format %{ "vector_conv_HF2F $dst,$src" %}
17990 ins_encode %{
17991 int vlen_enc = vector_length_encoding(this);
17992 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17993 %}
17994 ins_pipe( pipe_slow );
17995 %}
17996
17997 // ---------------------------------------- VectorReinterpret ------------------------------------
17998 instruct reinterpret_mask(kReg dst) %{
17999 predicate(n->bottom_type()->isa_vectmask() &&
18000 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18001 match(Set dst (VectorReinterpret dst));
18002 ins_cost(125);
18003 format %{ "vector_reinterpret $dst\t!" %}
18004 ins_encode %{
18005 // empty
18006 %}
18007 ins_pipe( pipe_slow );
18008 %}
18009
18010 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18011 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18012 n->bottom_type()->isa_vectmask() &&
18013 n->in(1)->bottom_type()->isa_vectmask() &&
18014 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18015 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18016 match(Set dst (VectorReinterpret src));
18017 effect(TEMP xtmp);
18018 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18019 ins_encode %{
18020 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18021 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18022 assert(src_sz == dst_sz , "src and dst size mismatch");
18023 int vlen_enc = vector_length_encoding(src_sz);
18024 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18025 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18026 %}
18027 ins_pipe( pipe_slow );
18028 %}
18029
18030 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18031 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18032 n->bottom_type()->isa_vectmask() &&
18033 n->in(1)->bottom_type()->isa_vectmask() &&
18034 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18035 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18036 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18037 match(Set dst (VectorReinterpret src));
18038 effect(TEMP xtmp);
18039 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18040 ins_encode %{
18041 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18042 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18043 assert(src_sz == dst_sz , "src and dst size mismatch");
18044 int vlen_enc = vector_length_encoding(src_sz);
18045 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18046 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18047 %}
18048 ins_pipe( pipe_slow );
18049 %}
18050
18051 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18052 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18053 n->bottom_type()->isa_vectmask() &&
18054 n->in(1)->bottom_type()->isa_vectmask() &&
18055 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18056 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18057 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18058 match(Set dst (VectorReinterpret src));
18059 effect(TEMP xtmp);
18060 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18061 ins_encode %{
18062 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18063 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18064 assert(src_sz == dst_sz , "src and dst size mismatch");
18065 int vlen_enc = vector_length_encoding(src_sz);
18066 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18067 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18068 %}
18069 ins_pipe( pipe_slow );
18070 %}
18071
18072 instruct reinterpret(vec dst) %{
18073 predicate(!n->bottom_type()->isa_vectmask() &&
18074 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18075 match(Set dst (VectorReinterpret dst));
18076 ins_cost(125);
18077 format %{ "vector_reinterpret $dst\t!" %}
18078 ins_encode %{
18079 // empty
18080 %}
18081 ins_pipe( pipe_slow );
18082 %}
18083
18084 instruct reinterpret_expand(vec dst, vec src) %{
18085 predicate(UseAVX == 0 &&
18086 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18087 match(Set dst (VectorReinterpret src));
18088 ins_cost(125);
18089 effect(TEMP dst);
18090 format %{ "vector_reinterpret_expand $dst,$src" %}
18091 ins_encode %{
18092 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18093 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18094
18095 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18096 if (src_vlen_in_bytes == 4) {
18097 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18098 } else {
18099 assert(src_vlen_in_bytes == 8, "");
18100 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18101 }
18102 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18103 %}
18104 ins_pipe( pipe_slow );
18105 %}
18106
18107 instruct vreinterpret_expand4(legVec dst, vec src) %{
18108 predicate(UseAVX > 0 &&
18109 !n->bottom_type()->isa_vectmask() &&
18110 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18111 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18112 match(Set dst (VectorReinterpret src));
18113 ins_cost(125);
18114 format %{ "vector_reinterpret_expand $dst,$src" %}
18115 ins_encode %{
18116 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18117 %}
18118 ins_pipe( pipe_slow );
18119 %}
18120
18121
18122 instruct vreinterpret_expand(legVec dst, vec src) %{
18123 predicate(UseAVX > 0 &&
18124 !n->bottom_type()->isa_vectmask() &&
18125 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18126 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18127 match(Set dst (VectorReinterpret src));
18128 ins_cost(125);
18129 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18130 ins_encode %{
18131 switch (Matcher::vector_length_in_bytes(this, $src)) {
18132 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18133 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18134 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18135 default: ShouldNotReachHere();
18136 }
18137 %}
18138 ins_pipe( pipe_slow );
18139 %}
18140
18141 instruct reinterpret_shrink(vec dst, legVec src) %{
18142 predicate(!n->bottom_type()->isa_vectmask() &&
18143 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18144 match(Set dst (VectorReinterpret src));
18145 ins_cost(125);
18146 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18147 ins_encode %{
18148 switch (Matcher::vector_length_in_bytes(this)) {
18149 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18150 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18151 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18152 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18153 default: ShouldNotReachHere();
18154 }
18155 %}
18156 ins_pipe( pipe_slow );
18157 %}
18158
18159 // ----------------------------------------------------------------------------------------------------
18160
18161 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18162 match(Set dst (RoundDoubleMode src rmode));
18163 format %{ "roundsd $dst,$src" %}
18164 ins_cost(150);
18165 ins_encode %{
18166 assert(UseSSE >= 4, "required");
18167 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18168 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18169 }
18170 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18171 %}
18172 ins_pipe(pipe_slow);
18173 %}
18174
18175 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18176 match(Set dst (RoundDoubleMode con rmode));
18177 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18178 ins_cost(150);
18179 ins_encode %{
18180 assert(UseSSE >= 4, "required");
18181 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18182 %}
18183 ins_pipe(pipe_slow);
18184 %}
18185
18186 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18187 predicate(Matcher::vector_length(n) < 8);
18188 match(Set dst (RoundDoubleModeV src rmode));
18189 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18190 ins_encode %{
18191 assert(UseAVX > 0, "required");
18192 int vlen_enc = vector_length_encoding(this);
18193 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18194 %}
18195 ins_pipe( pipe_slow );
18196 %}
18197
18198 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18199 predicate(Matcher::vector_length(n) == 8);
18200 match(Set dst (RoundDoubleModeV src rmode));
18201 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18202 ins_encode %{
18203 assert(UseAVX > 2, "required");
18204 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18205 %}
18206 ins_pipe( pipe_slow );
18207 %}
18208
18209 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18210 predicate(Matcher::vector_length(n) < 8);
18211 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18212 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18213 ins_encode %{
18214 assert(UseAVX > 0, "required");
18215 int vlen_enc = vector_length_encoding(this);
18216 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18217 %}
18218 ins_pipe( pipe_slow );
18219 %}
18220
18221 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18222 predicate(Matcher::vector_length(n) == 8);
18223 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18224 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18225 ins_encode %{
18226 assert(UseAVX > 2, "required");
18227 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18228 %}
18229 ins_pipe( pipe_slow );
18230 %}
18231
18232 instruct onspinwait() %{
18233 match(OnSpinWait);
18234 ins_cost(200);
18235
18236 format %{
18237 $$template
18238 $$emit$$"pause\t! membar_onspinwait"
18239 %}
18240 ins_encode %{
18241 __ pause();
18242 %}
18243 ins_pipe(pipe_slow);
18244 %}
18245
18246 // a * b + c
18247 instruct fmaD_reg(regD a, regD b, regD c) %{
18248 match(Set c (FmaD c (Binary a b)));
18249 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18250 ins_cost(150);
18251 ins_encode %{
18252 assert(UseFMA, "Needs FMA instructions support.");
18253 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18254 %}
18255 ins_pipe( pipe_slow );
18256 %}
18257
18258 // a * b + c
18259 instruct fmaF_reg(regF a, regF b, regF c) %{
18260 match(Set c (FmaF c (Binary a b)));
18261 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18262 ins_cost(150);
18263 ins_encode %{
18264 assert(UseFMA, "Needs FMA instructions support.");
18265 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18266 %}
18267 ins_pipe( pipe_slow );
18268 %}
18269
18270 // ====================VECTOR INSTRUCTIONS=====================================
18271
18272 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18273 instruct MoveVec2Leg(legVec dst, vec src) %{
18274 match(Set dst src);
18275 format %{ "" %}
18276 ins_encode %{
18277 ShouldNotReachHere();
18278 %}
18279 ins_pipe( fpu_reg_reg );
18280 %}
18281
18282 instruct MoveLeg2Vec(vec dst, legVec src) %{
18283 match(Set dst src);
18284 format %{ "" %}
18285 ins_encode %{
18286 ShouldNotReachHere();
18287 %}
18288 ins_pipe( fpu_reg_reg );
18289 %}
18290
18291 // ============================================================================
18292
18293 // Load vectors generic operand pattern
18294 instruct loadV(vec dst, memory mem) %{
18295 match(Set dst (LoadVector mem));
18296 ins_cost(125);
18297 format %{ "load_vector $dst,$mem" %}
18298 ins_encode %{
18299 BasicType bt = Matcher::vector_element_basic_type(this);
18300 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18301 %}
18302 ins_pipe( pipe_slow );
18303 %}
18304
18305 // Store vectors generic operand pattern.
18306 instruct storeV(memory mem, vec src) %{
18307 match(Set mem (StoreVector mem src));
18308 ins_cost(145);
18309 format %{ "store_vector $mem,$src\n\t" %}
18310 ins_encode %{
18311 switch (Matcher::vector_length_in_bytes(this, $src)) {
18312 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18313 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18314 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18315 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18316 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18317 default: ShouldNotReachHere();
18318 }
18319 %}
18320 ins_pipe( pipe_slow );
18321 %}
18322
18323 // ---------------------------------------- Gather ------------------------------------
18324
18325 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18326
18327 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18328 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18329 Matcher::vector_length_in_bytes(n) <= 32);
18330 match(Set dst (LoadVectorGather mem idx));
18331 effect(TEMP dst, TEMP tmp, TEMP mask);
18332 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18333 ins_encode %{
18334 int vlen_enc = vector_length_encoding(this);
18335 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18336 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18337 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18338 __ lea($tmp$$Register, $mem$$Address);
18339 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18340 %}
18341 ins_pipe( pipe_slow );
18342 %}
18343
18344
18345 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18346 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18347 !is_subword_type(Matcher::vector_element_basic_type(n)));
18348 match(Set dst (LoadVectorGather mem idx));
18349 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18350 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18351 ins_encode %{
18352 int vlen_enc = vector_length_encoding(this);
18353 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18354 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18355 __ lea($tmp$$Register, $mem$$Address);
18356 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18357 %}
18358 ins_pipe( pipe_slow );
18359 %}
18360
18361 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18362 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18363 !is_subword_type(Matcher::vector_element_basic_type(n)));
18364 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18365 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18366 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18367 ins_encode %{
18368 assert(UseAVX > 2, "sanity");
18369 int vlen_enc = vector_length_encoding(this);
18370 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18371 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18372 // Note: Since gather instruction partially updates the opmask register used
18373 // for predication hense moving mask operand to a temporary.
18374 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18375 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18376 __ lea($tmp$$Register, $mem$$Address);
18377 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18378 %}
18379 ins_pipe( pipe_slow );
18380 %}
18381
18382 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18383 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18384 match(Set dst (LoadVectorGather mem idx_base));
18385 effect(TEMP tmp, TEMP rtmp);
18386 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18387 ins_encode %{
18388 int vlen_enc = vector_length_encoding(this);
18389 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18390 __ lea($tmp$$Register, $mem$$Address);
18391 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18392 %}
18393 ins_pipe( pipe_slow );
18394 %}
18395
18396 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18397 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18398 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18399 match(Set dst (LoadVectorGather mem idx_base));
18400 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18401 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18402 ins_encode %{
18403 int vlen_enc = vector_length_encoding(this);
18404 int vector_len = Matcher::vector_length(this);
18405 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18406 __ lea($tmp$$Register, $mem$$Address);
18407 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18408 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18409 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18410 %}
18411 ins_pipe( pipe_slow );
18412 %}
18413
18414 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18415 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18416 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18417 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18418 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18419 ins_encode %{
18420 int vlen_enc = vector_length_encoding(this);
18421 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18422 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18423 __ lea($tmp$$Register, $mem$$Address);
18424 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18425 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18426 %}
18427 ins_pipe( pipe_slow );
18428 %}
18429
18430 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18431 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18432 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18433 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18434 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18435 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18436 ins_encode %{
18437 int vlen_enc = vector_length_encoding(this);
18438 int vector_len = Matcher::vector_length(this);
18439 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18440 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18441 __ lea($tmp$$Register, $mem$$Address);
18442 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18443 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18444 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18445 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18446 %}
18447 ins_pipe( pipe_slow );
18448 %}
18449
18450 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18451 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18452 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18453 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18454 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18455 ins_encode %{
18456 int vlen_enc = vector_length_encoding(this);
18457 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18458 __ lea($tmp$$Register, $mem$$Address);
18459 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18460 if (elem_bt == T_SHORT) {
18461 __ movl($mask_idx$$Register, 0x55555555);
18462 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18463 }
18464 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18465 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18466 %}
18467 ins_pipe( pipe_slow );
18468 %}
18469
18470 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18471 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18472 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18473 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18474 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18475 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18476 ins_encode %{
18477 int vlen_enc = vector_length_encoding(this);
18478 int vector_len = Matcher::vector_length(this);
18479 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18480 __ lea($tmp$$Register, $mem$$Address);
18481 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18482 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18483 if (elem_bt == T_SHORT) {
18484 __ movl($mask_idx$$Register, 0x55555555);
18485 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18486 }
18487 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18488 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18489 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18490 %}
18491 ins_pipe( pipe_slow );
18492 %}
18493
18494 // ====================Scatter=======================================
18495
18496 // Scatter INT, LONG, FLOAT, DOUBLE
18497
18498 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18499 predicate(UseAVX > 2);
18500 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18501 effect(TEMP tmp, TEMP ktmp);
18502 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18503 ins_encode %{
18504 int vlen_enc = vector_length_encoding(this, $src);
18505 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18506
18507 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18508 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18509
18510 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18511 __ lea($tmp$$Register, $mem$$Address);
18512 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18513 %}
18514 ins_pipe( pipe_slow );
18515 %}
18516
18517 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18518 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18519 effect(TEMP tmp, TEMP ktmp);
18520 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18521 ins_encode %{
18522 int vlen_enc = vector_length_encoding(this, $src);
18523 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18524 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18525 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18526 // Note: Since scatter instruction partially updates the opmask register used
18527 // for predication hense moving mask operand to a temporary.
18528 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18529 __ lea($tmp$$Register, $mem$$Address);
18530 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18531 %}
18532 ins_pipe( pipe_slow );
18533 %}
18534
18535 // ====================REPLICATE=======================================
18536
18537 // Replicate byte scalar to be vector
18538 instruct vReplB_reg(vec dst, rRegI src) %{
18539 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18540 match(Set dst (Replicate src));
18541 format %{ "replicateB $dst,$src" %}
18542 ins_encode %{
18543 uint vlen = Matcher::vector_length(this);
18544 if (UseAVX >= 2) {
18545 int vlen_enc = vector_length_encoding(this);
18546 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18547 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18548 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18549 } else {
18550 __ movdl($dst$$XMMRegister, $src$$Register);
18551 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18552 }
18553 } else {
18554 assert(UseAVX < 2, "");
18555 __ movdl($dst$$XMMRegister, $src$$Register);
18556 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18557 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18558 if (vlen >= 16) {
18559 assert(vlen == 16, "");
18560 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18561 }
18562 }
18563 %}
18564 ins_pipe( pipe_slow );
18565 %}
18566
18567 instruct ReplB_mem(vec dst, memory mem) %{
18568 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18569 match(Set dst (Replicate (LoadB mem)));
18570 format %{ "replicateB $dst,$mem" %}
18571 ins_encode %{
18572 int vlen_enc = vector_length_encoding(this);
18573 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18574 %}
18575 ins_pipe( pipe_slow );
18576 %}
18577
18578 // ====================ReplicateS=======================================
18579
18580 instruct vReplS_reg(vec dst, rRegI src) %{
18581 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18582 match(Set dst (Replicate src));
18583 format %{ "replicateS $dst,$src" %}
18584 ins_encode %{
18585 uint vlen = Matcher::vector_length(this);
18586 int vlen_enc = vector_length_encoding(this);
18587 if (UseAVX >= 2) {
18588 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18589 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18590 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18591 } else {
18592 __ movdl($dst$$XMMRegister, $src$$Register);
18593 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18594 }
18595 } else {
18596 assert(UseAVX < 2, "");
18597 __ movdl($dst$$XMMRegister, $src$$Register);
18598 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18599 if (vlen >= 8) {
18600 assert(vlen == 8, "");
18601 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18602 }
18603 }
18604 %}
18605 ins_pipe( pipe_slow );
18606 %}
18607
18608 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18609 match(Set dst (Replicate con));
18610 effect(TEMP rtmp);
18611 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18612 ins_encode %{
18613 int vlen_enc = vector_length_encoding(this);
18614 BasicType bt = Matcher::vector_element_basic_type(this);
18615 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18616 __ movl($rtmp$$Register, $con$$constant);
18617 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18618 %}
18619 ins_pipe( pipe_slow );
18620 %}
18621
18622 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18623 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18624 match(Set dst (Replicate src));
18625 effect(TEMP rtmp);
18626 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18627 ins_encode %{
18628 int vlen_enc = vector_length_encoding(this);
18629 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18630 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18631 %}
18632 ins_pipe( pipe_slow );
18633 %}
18634
18635 instruct ReplS_mem(vec dst, memory mem) %{
18636 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18637 match(Set dst (Replicate (LoadS mem)));
18638 format %{ "replicateS $dst,$mem" %}
18639 ins_encode %{
18640 int vlen_enc = vector_length_encoding(this);
18641 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18642 %}
18643 ins_pipe( pipe_slow );
18644 %}
18645
18646 // ====================ReplicateI=======================================
18647
18648 instruct ReplI_reg(vec dst, rRegI src) %{
18649 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18650 match(Set dst (Replicate src));
18651 format %{ "replicateI $dst,$src" %}
18652 ins_encode %{
18653 uint vlen = Matcher::vector_length(this);
18654 int vlen_enc = vector_length_encoding(this);
18655 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18656 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18657 } else if (VM_Version::supports_avx2()) {
18658 __ movdl($dst$$XMMRegister, $src$$Register);
18659 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18660 } else {
18661 __ movdl($dst$$XMMRegister, $src$$Register);
18662 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18663 }
18664 %}
18665 ins_pipe( pipe_slow );
18666 %}
18667
18668 instruct ReplI_mem(vec dst, memory mem) %{
18669 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18670 match(Set dst (Replicate (LoadI mem)));
18671 format %{ "replicateI $dst,$mem" %}
18672 ins_encode %{
18673 int vlen_enc = vector_length_encoding(this);
18674 if (VM_Version::supports_avx2()) {
18675 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18676 } else if (VM_Version::supports_avx()) {
18677 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18678 } else {
18679 __ movdl($dst$$XMMRegister, $mem$$Address);
18680 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18681 }
18682 %}
18683 ins_pipe( pipe_slow );
18684 %}
18685
18686 instruct ReplI_imm(vec dst, immI con) %{
18687 predicate(Matcher::is_non_long_integral_vector(n));
18688 match(Set dst (Replicate con));
18689 format %{ "replicateI $dst,$con" %}
18690 ins_encode %{
18691 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18692 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18693 type2aelembytes(Matcher::vector_element_basic_type(this))));
18694 BasicType bt = Matcher::vector_element_basic_type(this);
18695 int vlen = Matcher::vector_length_in_bytes(this);
18696 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18697 %}
18698 ins_pipe( pipe_slow );
18699 %}
18700
18701 // Replicate scalar zero to be vector
18702 instruct ReplI_zero(vec dst, immI_0 zero) %{
18703 predicate(Matcher::is_non_long_integral_vector(n));
18704 match(Set dst (Replicate zero));
18705 format %{ "replicateI $dst,$zero" %}
18706 ins_encode %{
18707 int vlen_enc = vector_length_encoding(this);
18708 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18709 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18710 } else {
18711 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18712 }
18713 %}
18714 ins_pipe( fpu_reg_reg );
18715 %}
18716
18717 instruct ReplI_M1(vec dst, immI_M1 con) %{
18718 predicate(Matcher::is_non_long_integral_vector(n));
18719 match(Set dst (Replicate con));
18720 format %{ "vallones $dst" %}
18721 ins_encode %{
18722 int vector_len = vector_length_encoding(this);
18723 __ vallones($dst$$XMMRegister, vector_len);
18724 %}
18725 ins_pipe( pipe_slow );
18726 %}
18727
18728 // ====================ReplicateL=======================================
18729
18730 // Replicate long (8 byte) scalar to be vector
18731 instruct ReplL_reg(vec dst, rRegL src) %{
18732 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18733 match(Set dst (Replicate src));
18734 format %{ "replicateL $dst,$src" %}
18735 ins_encode %{
18736 int vlen = Matcher::vector_length(this);
18737 int vlen_enc = vector_length_encoding(this);
18738 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18739 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18740 } else if (VM_Version::supports_avx2()) {
18741 __ movdq($dst$$XMMRegister, $src$$Register);
18742 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18743 } else {
18744 __ movdq($dst$$XMMRegister, $src$$Register);
18745 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18746 }
18747 %}
18748 ins_pipe( pipe_slow );
18749 %}
18750
18751 instruct ReplL_mem(vec dst, memory mem) %{
18752 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18753 match(Set dst (Replicate (LoadL mem)));
18754 format %{ "replicateL $dst,$mem" %}
18755 ins_encode %{
18756 int vlen_enc = vector_length_encoding(this);
18757 if (VM_Version::supports_avx2()) {
18758 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18759 } else if (VM_Version::supports_sse3()) {
18760 __ movddup($dst$$XMMRegister, $mem$$Address);
18761 } else {
18762 __ movq($dst$$XMMRegister, $mem$$Address);
18763 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18764 }
18765 %}
18766 ins_pipe( pipe_slow );
18767 %}
18768
18769 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18770 instruct ReplL_imm(vec dst, immL con) %{
18771 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18772 match(Set dst (Replicate con));
18773 format %{ "replicateL $dst,$con" %}
18774 ins_encode %{
18775 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18776 int vlen = Matcher::vector_length_in_bytes(this);
18777 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18778 %}
18779 ins_pipe( pipe_slow );
18780 %}
18781
18782 instruct ReplL_zero(vec dst, immL0 zero) %{
18783 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18784 match(Set dst (Replicate zero));
18785 format %{ "replicateL $dst,$zero" %}
18786 ins_encode %{
18787 int vlen_enc = vector_length_encoding(this);
18788 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18789 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18790 } else {
18791 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18792 }
18793 %}
18794 ins_pipe( fpu_reg_reg );
18795 %}
18796
18797 instruct ReplL_M1(vec dst, immL_M1 con) %{
18798 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18799 match(Set dst (Replicate con));
18800 format %{ "vallones $dst" %}
18801 ins_encode %{
18802 int vector_len = vector_length_encoding(this);
18803 __ vallones($dst$$XMMRegister, vector_len);
18804 %}
18805 ins_pipe( pipe_slow );
18806 %}
18807
18808 // ====================ReplicateF=======================================
18809
18810 instruct vReplF_reg(vec dst, vlRegF src) %{
18811 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18812 match(Set dst (Replicate src));
18813 format %{ "replicateF $dst,$src" %}
18814 ins_encode %{
18815 uint vlen = Matcher::vector_length(this);
18816 int vlen_enc = vector_length_encoding(this);
18817 if (vlen <= 4) {
18818 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18819 } else if (VM_Version::supports_avx2()) {
18820 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18821 } else {
18822 assert(vlen == 8, "sanity");
18823 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18824 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18825 }
18826 %}
18827 ins_pipe( pipe_slow );
18828 %}
18829
18830 instruct ReplF_reg(vec dst, vlRegF src) %{
18831 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18832 match(Set dst (Replicate src));
18833 format %{ "replicateF $dst,$src" %}
18834 ins_encode %{
18835 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18836 %}
18837 ins_pipe( pipe_slow );
18838 %}
18839
18840 instruct ReplF_mem(vec dst, memory mem) %{
18841 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18842 match(Set dst (Replicate (LoadF mem)));
18843 format %{ "replicateF $dst,$mem" %}
18844 ins_encode %{
18845 int vlen_enc = vector_length_encoding(this);
18846 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18847 %}
18848 ins_pipe( pipe_slow );
18849 %}
18850
18851 // Replicate float scalar immediate to be vector by loading from const table.
18852 instruct ReplF_imm(vec dst, immF con) %{
18853 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18854 match(Set dst (Replicate con));
18855 format %{ "replicateF $dst,$con" %}
18856 ins_encode %{
18857 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18858 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18859 int vlen = Matcher::vector_length_in_bytes(this);
18860 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18861 %}
18862 ins_pipe( pipe_slow );
18863 %}
18864
18865 instruct ReplF_zero(vec dst, immF0 zero) %{
18866 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18867 match(Set dst (Replicate zero));
18868 format %{ "replicateF $dst,$zero" %}
18869 ins_encode %{
18870 int vlen_enc = vector_length_encoding(this);
18871 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18872 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18873 } else {
18874 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18875 }
18876 %}
18877 ins_pipe( fpu_reg_reg );
18878 %}
18879
18880 // ====================ReplicateD=======================================
18881
18882 // Replicate double (8 bytes) scalar to be vector
18883 instruct vReplD_reg(vec dst, vlRegD src) %{
18884 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18885 match(Set dst (Replicate src));
18886 format %{ "replicateD $dst,$src" %}
18887 ins_encode %{
18888 uint vlen = Matcher::vector_length(this);
18889 int vlen_enc = vector_length_encoding(this);
18890 if (vlen <= 2) {
18891 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18892 } else if (VM_Version::supports_avx2()) {
18893 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18894 } else {
18895 assert(vlen == 4, "sanity");
18896 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18897 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18898 }
18899 %}
18900 ins_pipe( pipe_slow );
18901 %}
18902
18903 instruct ReplD_reg(vec dst, vlRegD src) %{
18904 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18905 match(Set dst (Replicate src));
18906 format %{ "replicateD $dst,$src" %}
18907 ins_encode %{
18908 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18909 %}
18910 ins_pipe( pipe_slow );
18911 %}
18912
18913 instruct ReplD_mem(vec dst, memory mem) %{
18914 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18915 match(Set dst (Replicate (LoadD mem)));
18916 format %{ "replicateD $dst,$mem" %}
18917 ins_encode %{
18918 if (Matcher::vector_length(this) >= 4) {
18919 int vlen_enc = vector_length_encoding(this);
18920 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18921 } else {
18922 __ movddup($dst$$XMMRegister, $mem$$Address);
18923 }
18924 %}
18925 ins_pipe( pipe_slow );
18926 %}
18927
18928 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18929 instruct ReplD_imm(vec dst, immD con) %{
18930 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18931 match(Set dst (Replicate con));
18932 format %{ "replicateD $dst,$con" %}
18933 ins_encode %{
18934 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18935 int vlen = Matcher::vector_length_in_bytes(this);
18936 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18937 %}
18938 ins_pipe( pipe_slow );
18939 %}
18940
18941 instruct ReplD_zero(vec dst, immD0 zero) %{
18942 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18943 match(Set dst (Replicate zero));
18944 format %{ "replicateD $dst,$zero" %}
18945 ins_encode %{
18946 int vlen_enc = vector_length_encoding(this);
18947 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18948 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18949 } else {
18950 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18951 }
18952 %}
18953 ins_pipe( fpu_reg_reg );
18954 %}
18955
18956 // ====================VECTOR INSERT=======================================
18957
18958 instruct insert(vec dst, rRegI val, immU8 idx) %{
18959 predicate(Matcher::vector_length_in_bytes(n) < 32);
18960 match(Set dst (VectorInsert (Binary dst val) idx));
18961 format %{ "vector_insert $dst,$val,$idx" %}
18962 ins_encode %{
18963 assert(UseSSE >= 4, "required");
18964 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18965
18966 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18967
18968 assert(is_integral_type(elem_bt), "");
18969 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18970
18971 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18972 %}
18973 ins_pipe( pipe_slow );
18974 %}
18975
18976 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18977 predicate(Matcher::vector_length_in_bytes(n) == 32);
18978 match(Set dst (VectorInsert (Binary src val) idx));
18979 effect(TEMP vtmp);
18980 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18981 ins_encode %{
18982 int vlen_enc = Assembler::AVX_256bit;
18983 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18984 int elem_per_lane = 16/type2aelembytes(elem_bt);
18985 int log2epr = log2(elem_per_lane);
18986
18987 assert(is_integral_type(elem_bt), "sanity");
18988 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18989
18990 uint x_idx = $idx$$constant & right_n_bits(log2epr);
18991 uint y_idx = ($idx$$constant >> log2epr) & 1;
18992 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18993 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18994 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18995 %}
18996 ins_pipe( pipe_slow );
18997 %}
18998
18999 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19000 predicate(Matcher::vector_length_in_bytes(n) == 64);
19001 match(Set dst (VectorInsert (Binary src val) idx));
19002 effect(TEMP vtmp);
19003 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19004 ins_encode %{
19005 assert(UseAVX > 2, "sanity");
19006
19007 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19008 int elem_per_lane = 16/type2aelembytes(elem_bt);
19009 int log2epr = log2(elem_per_lane);
19010
19011 assert(is_integral_type(elem_bt), "");
19012 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19013
19014 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19015 uint y_idx = ($idx$$constant >> log2epr) & 3;
19016 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19017 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19018 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19019 %}
19020 ins_pipe( pipe_slow );
19021 %}
19022
19023 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19024 predicate(Matcher::vector_length(n) == 2);
19025 match(Set dst (VectorInsert (Binary dst val) idx));
19026 format %{ "vector_insert $dst,$val,$idx" %}
19027 ins_encode %{
19028 assert(UseSSE >= 4, "required");
19029 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19030 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19031
19032 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19033 %}
19034 ins_pipe( pipe_slow );
19035 %}
19036
19037 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19038 predicate(Matcher::vector_length(n) == 4);
19039 match(Set dst (VectorInsert (Binary src val) idx));
19040 effect(TEMP vtmp);
19041 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19042 ins_encode %{
19043 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19044 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19045
19046 uint x_idx = $idx$$constant & right_n_bits(1);
19047 uint y_idx = ($idx$$constant >> 1) & 1;
19048 int vlen_enc = Assembler::AVX_256bit;
19049 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19050 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19051 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19052 %}
19053 ins_pipe( pipe_slow );
19054 %}
19055
19056 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19057 predicate(Matcher::vector_length(n) == 8);
19058 match(Set dst (VectorInsert (Binary src val) idx));
19059 effect(TEMP vtmp);
19060 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19061 ins_encode %{
19062 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19063 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19064
19065 uint x_idx = $idx$$constant & right_n_bits(1);
19066 uint y_idx = ($idx$$constant >> 1) & 3;
19067 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19068 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19069 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19070 %}
19071 ins_pipe( pipe_slow );
19072 %}
19073
19074 instruct insertF(vec dst, regF val, immU8 idx) %{
19075 predicate(Matcher::vector_length(n) < 8);
19076 match(Set dst (VectorInsert (Binary dst val) idx));
19077 format %{ "vector_insert $dst,$val,$idx" %}
19078 ins_encode %{
19079 assert(UseSSE >= 4, "sanity");
19080
19081 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19082 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19083
19084 uint x_idx = $idx$$constant & right_n_bits(2);
19085 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19086 %}
19087 ins_pipe( pipe_slow );
19088 %}
19089
19090 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19091 predicate(Matcher::vector_length(n) >= 8);
19092 match(Set dst (VectorInsert (Binary src val) idx));
19093 effect(TEMP vtmp);
19094 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19095 ins_encode %{
19096 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19097 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19098
19099 int vlen = Matcher::vector_length(this);
19100 uint x_idx = $idx$$constant & right_n_bits(2);
19101 if (vlen == 8) {
19102 uint y_idx = ($idx$$constant >> 2) & 1;
19103 int vlen_enc = Assembler::AVX_256bit;
19104 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19105 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19106 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19107 } else {
19108 assert(vlen == 16, "sanity");
19109 uint y_idx = ($idx$$constant >> 2) & 3;
19110 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19111 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19112 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19113 }
19114 %}
19115 ins_pipe( pipe_slow );
19116 %}
19117
19118 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19119 predicate(Matcher::vector_length(n) == 2);
19120 match(Set dst (VectorInsert (Binary dst val) idx));
19121 effect(TEMP tmp);
19122 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19123 ins_encode %{
19124 assert(UseSSE >= 4, "sanity");
19125 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19126 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19127
19128 __ movq($tmp$$Register, $val$$XMMRegister);
19129 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19130 %}
19131 ins_pipe( pipe_slow );
19132 %}
19133
19134 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19135 predicate(Matcher::vector_length(n) == 4);
19136 match(Set dst (VectorInsert (Binary src val) idx));
19137 effect(TEMP vtmp, TEMP tmp);
19138 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19139 ins_encode %{
19140 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19141 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19142
19143 uint x_idx = $idx$$constant & right_n_bits(1);
19144 uint y_idx = ($idx$$constant >> 1) & 1;
19145 int vlen_enc = Assembler::AVX_256bit;
19146 __ movq($tmp$$Register, $val$$XMMRegister);
19147 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19148 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19149 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19150 %}
19151 ins_pipe( pipe_slow );
19152 %}
19153
19154 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19155 predicate(Matcher::vector_length(n) == 8);
19156 match(Set dst (VectorInsert (Binary src val) idx));
19157 effect(TEMP tmp, TEMP vtmp);
19158 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19159 ins_encode %{
19160 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19161 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19162
19163 uint x_idx = $idx$$constant & right_n_bits(1);
19164 uint y_idx = ($idx$$constant >> 1) & 3;
19165 __ movq($tmp$$Register, $val$$XMMRegister);
19166 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19167 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19168 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19169 %}
19170 ins_pipe( pipe_slow );
19171 %}
19172
19173 // ====================REDUCTION ARITHMETIC=======================================
19174
19175 // =======================Int Reduction==========================================
19176
19177 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19178 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19179 match(Set dst (AddReductionVI src1 src2));
19180 match(Set dst (MulReductionVI src1 src2));
19181 match(Set dst (AndReductionV src1 src2));
19182 match(Set dst ( OrReductionV src1 src2));
19183 match(Set dst (XorReductionV src1 src2));
19184 match(Set dst (MinReductionV src1 src2));
19185 match(Set dst (MaxReductionV src1 src2));
19186 effect(TEMP vtmp1, TEMP vtmp2);
19187 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19188 ins_encode %{
19189 int opcode = this->ideal_Opcode();
19190 int vlen = Matcher::vector_length(this, $src2);
19191 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19192 %}
19193 ins_pipe( pipe_slow );
19194 %}
19195
19196 // =======================Long Reduction==========================================
19197
19198 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19199 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19200 match(Set dst (AddReductionVL src1 src2));
19201 match(Set dst (MulReductionVL src1 src2));
19202 match(Set dst (AndReductionV src1 src2));
19203 match(Set dst ( OrReductionV src1 src2));
19204 match(Set dst (XorReductionV src1 src2));
19205 match(Set dst (MinReductionV src1 src2));
19206 match(Set dst (MaxReductionV src1 src2));
19207 effect(TEMP vtmp1, TEMP vtmp2);
19208 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19209 ins_encode %{
19210 int opcode = this->ideal_Opcode();
19211 int vlen = Matcher::vector_length(this, $src2);
19212 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19213 %}
19214 ins_pipe( pipe_slow );
19215 %}
19216
19217 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19218 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19219 match(Set dst (AddReductionVL src1 src2));
19220 match(Set dst (MulReductionVL src1 src2));
19221 match(Set dst (AndReductionV src1 src2));
19222 match(Set dst ( OrReductionV src1 src2));
19223 match(Set dst (XorReductionV src1 src2));
19224 match(Set dst (MinReductionV src1 src2));
19225 match(Set dst (MaxReductionV src1 src2));
19226 effect(TEMP vtmp1, TEMP vtmp2);
19227 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19228 ins_encode %{
19229 int opcode = this->ideal_Opcode();
19230 int vlen = Matcher::vector_length(this, $src2);
19231 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19232 %}
19233 ins_pipe( pipe_slow );
19234 %}
19235
19236 // =======================Float Reduction==========================================
19237
19238 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19239 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19240 match(Set dst (AddReductionVF dst src));
19241 match(Set dst (MulReductionVF dst src));
19242 effect(TEMP dst, TEMP vtmp);
19243 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19244 ins_encode %{
19245 int opcode = this->ideal_Opcode();
19246 int vlen = Matcher::vector_length(this, $src);
19247 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19248 %}
19249 ins_pipe( pipe_slow );
19250 %}
19251
19252 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19253 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19254 match(Set dst (AddReductionVF dst src));
19255 match(Set dst (MulReductionVF dst src));
19256 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19257 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19258 ins_encode %{
19259 int opcode = this->ideal_Opcode();
19260 int vlen = Matcher::vector_length(this, $src);
19261 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19262 %}
19263 ins_pipe( pipe_slow );
19264 %}
19265
19266 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19267 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19268 match(Set dst (AddReductionVF dst src));
19269 match(Set dst (MulReductionVF dst src));
19270 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19271 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19272 ins_encode %{
19273 int opcode = this->ideal_Opcode();
19274 int vlen = Matcher::vector_length(this, $src);
19275 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19276 %}
19277 ins_pipe( pipe_slow );
19278 %}
19279
19280
19281 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19282 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19283 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19284 // src1 contains reduction identity
19285 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19286 match(Set dst (AddReductionVF src1 src2));
19287 match(Set dst (MulReductionVF src1 src2));
19288 effect(TEMP dst);
19289 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19290 ins_encode %{
19291 int opcode = this->ideal_Opcode();
19292 int vlen = Matcher::vector_length(this, $src2);
19293 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19294 %}
19295 ins_pipe( pipe_slow );
19296 %}
19297
19298 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19299 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19300 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19301 // src1 contains reduction identity
19302 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19303 match(Set dst (AddReductionVF src1 src2));
19304 match(Set dst (MulReductionVF src1 src2));
19305 effect(TEMP dst, TEMP vtmp);
19306 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19307 ins_encode %{
19308 int opcode = this->ideal_Opcode();
19309 int vlen = Matcher::vector_length(this, $src2);
19310 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19311 %}
19312 ins_pipe( pipe_slow );
19313 %}
19314
19315 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19316 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19317 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19318 // src1 contains reduction identity
19319 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19320 match(Set dst (AddReductionVF src1 src2));
19321 match(Set dst (MulReductionVF src1 src2));
19322 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19323 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19324 ins_encode %{
19325 int opcode = this->ideal_Opcode();
19326 int vlen = Matcher::vector_length(this, $src2);
19327 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19328 %}
19329 ins_pipe( pipe_slow );
19330 %}
19331
19332 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19333 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19334 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19335 // src1 contains reduction identity
19336 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19337 match(Set dst (AddReductionVF src1 src2));
19338 match(Set dst (MulReductionVF src1 src2));
19339 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19340 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19341 ins_encode %{
19342 int opcode = this->ideal_Opcode();
19343 int vlen = Matcher::vector_length(this, $src2);
19344 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19345 %}
19346 ins_pipe( pipe_slow );
19347 %}
19348
19349 // =======================Double Reduction==========================================
19350
19351 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19352 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19353 match(Set dst (AddReductionVD dst src));
19354 match(Set dst (MulReductionVD dst src));
19355 effect(TEMP dst, TEMP vtmp);
19356 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19357 ins_encode %{
19358 int opcode = this->ideal_Opcode();
19359 int vlen = Matcher::vector_length(this, $src);
19360 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19361 %}
19362 ins_pipe( pipe_slow );
19363 %}
19364
19365 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19366 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19367 match(Set dst (AddReductionVD dst src));
19368 match(Set dst (MulReductionVD dst src));
19369 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19370 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19371 ins_encode %{
19372 int opcode = this->ideal_Opcode();
19373 int vlen = Matcher::vector_length(this, $src);
19374 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19375 %}
19376 ins_pipe( pipe_slow );
19377 %}
19378
19379 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19380 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19381 match(Set dst (AddReductionVD dst src));
19382 match(Set dst (MulReductionVD dst src));
19383 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19384 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19385 ins_encode %{
19386 int opcode = this->ideal_Opcode();
19387 int vlen = Matcher::vector_length(this, $src);
19388 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19389 %}
19390 ins_pipe( pipe_slow );
19391 %}
19392
19393 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19394 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19395 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19396 // src1 contains reduction identity
19397 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19398 match(Set dst (AddReductionVD src1 src2));
19399 match(Set dst (MulReductionVD src1 src2));
19400 effect(TEMP dst);
19401 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19402 ins_encode %{
19403 int opcode = this->ideal_Opcode();
19404 int vlen = Matcher::vector_length(this, $src2);
19405 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19406 %}
19407 ins_pipe( pipe_slow );
19408 %}
19409
19410 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19411 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19412 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19413 // src1 contains reduction identity
19414 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19415 match(Set dst (AddReductionVD src1 src2));
19416 match(Set dst (MulReductionVD src1 src2));
19417 effect(TEMP dst, TEMP vtmp);
19418 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19419 ins_encode %{
19420 int opcode = this->ideal_Opcode();
19421 int vlen = Matcher::vector_length(this, $src2);
19422 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19423 %}
19424 ins_pipe( pipe_slow );
19425 %}
19426
19427 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19428 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19429 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19430 // src1 contains reduction identity
19431 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19432 match(Set dst (AddReductionVD src1 src2));
19433 match(Set dst (MulReductionVD src1 src2));
19434 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19435 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19436 ins_encode %{
19437 int opcode = this->ideal_Opcode();
19438 int vlen = Matcher::vector_length(this, $src2);
19439 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19440 %}
19441 ins_pipe( pipe_slow );
19442 %}
19443
19444 // =======================Byte Reduction==========================================
19445
19446 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19447 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19448 match(Set dst (AddReductionVI src1 src2));
19449 match(Set dst (AndReductionV src1 src2));
19450 match(Set dst ( OrReductionV src1 src2));
19451 match(Set dst (XorReductionV src1 src2));
19452 match(Set dst (MinReductionV src1 src2));
19453 match(Set dst (MaxReductionV src1 src2));
19454 effect(TEMP vtmp1, TEMP vtmp2);
19455 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19456 ins_encode %{
19457 int opcode = this->ideal_Opcode();
19458 int vlen = Matcher::vector_length(this, $src2);
19459 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19460 %}
19461 ins_pipe( pipe_slow );
19462 %}
19463
19464 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19465 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19466 match(Set dst (AddReductionVI src1 src2));
19467 match(Set dst (AndReductionV src1 src2));
19468 match(Set dst ( OrReductionV src1 src2));
19469 match(Set dst (XorReductionV src1 src2));
19470 match(Set dst (MinReductionV src1 src2));
19471 match(Set dst (MaxReductionV src1 src2));
19472 effect(TEMP vtmp1, TEMP vtmp2);
19473 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19474 ins_encode %{
19475 int opcode = this->ideal_Opcode();
19476 int vlen = Matcher::vector_length(this, $src2);
19477 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19478 %}
19479 ins_pipe( pipe_slow );
19480 %}
19481
19482 // =======================Short Reduction==========================================
19483
19484 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19485 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19486 match(Set dst (AddReductionVI src1 src2));
19487 match(Set dst (MulReductionVI src1 src2));
19488 match(Set dst (AndReductionV src1 src2));
19489 match(Set dst ( OrReductionV src1 src2));
19490 match(Set dst (XorReductionV src1 src2));
19491 match(Set dst (MinReductionV src1 src2));
19492 match(Set dst (MaxReductionV src1 src2));
19493 effect(TEMP vtmp1, TEMP vtmp2);
19494 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19495 ins_encode %{
19496 int opcode = this->ideal_Opcode();
19497 int vlen = Matcher::vector_length(this, $src2);
19498 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19499 %}
19500 ins_pipe( pipe_slow );
19501 %}
19502
19503 // =======================Mul Reduction==========================================
19504
19505 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19506 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19507 Matcher::vector_length(n->in(2)) <= 32); // src2
19508 match(Set dst (MulReductionVI src1 src2));
19509 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19510 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19511 ins_encode %{
19512 int opcode = this->ideal_Opcode();
19513 int vlen = Matcher::vector_length(this, $src2);
19514 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19515 %}
19516 ins_pipe( pipe_slow );
19517 %}
19518
19519 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19520 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19521 Matcher::vector_length(n->in(2)) == 64); // src2
19522 match(Set dst (MulReductionVI src1 src2));
19523 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19524 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19525 ins_encode %{
19526 int opcode = this->ideal_Opcode();
19527 int vlen = Matcher::vector_length(this, $src2);
19528 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19529 %}
19530 ins_pipe( pipe_slow );
19531 %}
19532
19533 //--------------------Min/Max Float Reduction --------------------
19534 // Float Min Reduction
19535 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19536 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19537 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19538 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19539 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19540 Matcher::vector_length(n->in(2)) == 2);
19541 match(Set dst (MinReductionV src1 src2));
19542 match(Set dst (MaxReductionV src1 src2));
19543 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19544 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19545 ins_encode %{
19546 assert(UseAVX > 0, "sanity");
19547
19548 int opcode = this->ideal_Opcode();
19549 int vlen = Matcher::vector_length(this, $src2);
19550 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19551 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19552 %}
19553 ins_pipe( pipe_slow );
19554 %}
19555
19556 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19557 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19558 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19559 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19560 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19561 Matcher::vector_length(n->in(2)) >= 4);
19562 match(Set dst (MinReductionV src1 src2));
19563 match(Set dst (MaxReductionV src1 src2));
19564 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19565 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19566 ins_encode %{
19567 assert(UseAVX > 0, "sanity");
19568
19569 int opcode = this->ideal_Opcode();
19570 int vlen = Matcher::vector_length(this, $src2);
19571 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19572 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19573 %}
19574 ins_pipe( pipe_slow );
19575 %}
19576
19577 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19578 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19579 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19580 Matcher::vector_length(n->in(2)) == 2);
19581 match(Set dst (MinReductionV dst src));
19582 match(Set dst (MaxReductionV dst src));
19583 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19584 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19585 ins_encode %{
19586 assert(UseAVX > 0, "sanity");
19587
19588 int opcode = this->ideal_Opcode();
19589 int vlen = Matcher::vector_length(this, $src);
19590 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19591 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19592 %}
19593 ins_pipe( pipe_slow );
19594 %}
19595
19596
19597 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19598 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19599 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19600 Matcher::vector_length(n->in(2)) >= 4);
19601 match(Set dst (MinReductionV dst src));
19602 match(Set dst (MaxReductionV dst src));
19603 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19604 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19605 ins_encode %{
19606 assert(UseAVX > 0, "sanity");
19607
19608 int opcode = this->ideal_Opcode();
19609 int vlen = Matcher::vector_length(this, $src);
19610 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19611 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19612 %}
19613 ins_pipe( pipe_slow );
19614 %}
19615
19616 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19617 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19618 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19619 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19620 Matcher::vector_length(n->in(2)) == 2);
19621 match(Set dst (MinReductionV src1 src2));
19622 match(Set dst (MaxReductionV src1 src2));
19623 effect(TEMP dst, TEMP xtmp1);
19624 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19625 ins_encode %{
19626 int opcode = this->ideal_Opcode();
19627 int vlen = Matcher::vector_length(this, $src2);
19628 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19629 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19630 %}
19631 ins_pipe( pipe_slow );
19632 %}
19633
19634 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19635 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19636 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19637 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19638 Matcher::vector_length(n->in(2)) >= 4);
19639 match(Set dst (MinReductionV src1 src2));
19640 match(Set dst (MaxReductionV src1 src2));
19641 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19642 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19643 ins_encode %{
19644 int opcode = this->ideal_Opcode();
19645 int vlen = Matcher::vector_length(this, $src2);
19646 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19647 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19648 %}
19649 ins_pipe( pipe_slow );
19650 %}
19651
19652 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19653 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19654 Matcher::vector_length(n->in(2)) == 2);
19655 match(Set dst (MinReductionV dst src));
19656 match(Set dst (MaxReductionV dst src));
19657 effect(TEMP dst, TEMP xtmp1);
19658 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19659 ins_encode %{
19660 int opcode = this->ideal_Opcode();
19661 int vlen = Matcher::vector_length(this, $src);
19662 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19663 $xtmp1$$XMMRegister);
19664 %}
19665 ins_pipe( pipe_slow );
19666 %}
19667
19668 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19669 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19670 Matcher::vector_length(n->in(2)) >= 4);
19671 match(Set dst (MinReductionV dst src));
19672 match(Set dst (MaxReductionV dst src));
19673 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19674 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19675 ins_encode %{
19676 int opcode = this->ideal_Opcode();
19677 int vlen = Matcher::vector_length(this, $src);
19678 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19679 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19680 %}
19681 ins_pipe( pipe_slow );
19682 %}
19683
19684 //--------------------Min Double Reduction --------------------
19685 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19686 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19687 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19688 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19689 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19690 Matcher::vector_length(n->in(2)) == 2);
19691 match(Set dst (MinReductionV src1 src2));
19692 match(Set dst (MaxReductionV src1 src2));
19693 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19694 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19695 ins_encode %{
19696 assert(UseAVX > 0, "sanity");
19697
19698 int opcode = this->ideal_Opcode();
19699 int vlen = Matcher::vector_length(this, $src2);
19700 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19701 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19702 %}
19703 ins_pipe( pipe_slow );
19704 %}
19705
19706 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19707 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19708 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19709 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19710 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19711 Matcher::vector_length(n->in(2)) >= 4);
19712 match(Set dst (MinReductionV src1 src2));
19713 match(Set dst (MaxReductionV src1 src2));
19714 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19715 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19716 ins_encode %{
19717 assert(UseAVX > 0, "sanity");
19718
19719 int opcode = this->ideal_Opcode();
19720 int vlen = Matcher::vector_length(this, $src2);
19721 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19722 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19723 %}
19724 ins_pipe( pipe_slow );
19725 %}
19726
19727
19728 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19729 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19730 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19731 Matcher::vector_length(n->in(2)) == 2);
19732 match(Set dst (MinReductionV dst src));
19733 match(Set dst (MaxReductionV dst src));
19734 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19735 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19736 ins_encode %{
19737 assert(UseAVX > 0, "sanity");
19738
19739 int opcode = this->ideal_Opcode();
19740 int vlen = Matcher::vector_length(this, $src);
19741 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19742 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19743 %}
19744 ins_pipe( pipe_slow );
19745 %}
19746
19747 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19748 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19749 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19750 Matcher::vector_length(n->in(2)) >= 4);
19751 match(Set dst (MinReductionV dst src));
19752 match(Set dst (MaxReductionV dst src));
19753 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19754 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19755 ins_encode %{
19756 assert(UseAVX > 0, "sanity");
19757
19758 int opcode = this->ideal_Opcode();
19759 int vlen = Matcher::vector_length(this, $src);
19760 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19761 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19762 %}
19763 ins_pipe( pipe_slow );
19764 %}
19765
19766 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19767 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19768 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19769 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19770 Matcher::vector_length(n->in(2)) == 2);
19771 match(Set dst (MinReductionV src1 src2));
19772 match(Set dst (MaxReductionV src1 src2));
19773 effect(TEMP dst, TEMP xtmp1);
19774 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19775 ins_encode %{
19776 int opcode = this->ideal_Opcode();
19777 int vlen = Matcher::vector_length(this, $src2);
19778 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19779 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19780 %}
19781 ins_pipe( pipe_slow );
19782 %}
19783
19784 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19785 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19786 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19787 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19788 Matcher::vector_length(n->in(2)) >= 4);
19789 match(Set dst (MinReductionV src1 src2));
19790 match(Set dst (MaxReductionV src1 src2));
19791 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19792 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19793 ins_encode %{
19794 int opcode = this->ideal_Opcode();
19795 int vlen = Matcher::vector_length(this, $src2);
19796 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19797 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19798 %}
19799 ins_pipe( pipe_slow );
19800 %}
19801
19802
19803 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19804 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19805 Matcher::vector_length(n->in(2)) == 2);
19806 match(Set dst (MinReductionV dst src));
19807 match(Set dst (MaxReductionV dst src));
19808 effect(TEMP dst, TEMP xtmp1);
19809 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19810 ins_encode %{
19811 int opcode = this->ideal_Opcode();
19812 int vlen = Matcher::vector_length(this, $src);
19813 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19814 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19815 %}
19816 ins_pipe( pipe_slow );
19817 %}
19818
19819 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19820 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19821 Matcher::vector_length(n->in(2)) >= 4);
19822 match(Set dst (MinReductionV dst src));
19823 match(Set dst (MaxReductionV dst src));
19824 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19825 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19826 ins_encode %{
19827 int opcode = this->ideal_Opcode();
19828 int vlen = Matcher::vector_length(this, $src);
19829 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19830 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19831 %}
19832 ins_pipe( pipe_slow );
19833 %}
19834
19835 // ====================VECTOR ARITHMETIC=======================================
19836
19837 // --------------------------------- ADD --------------------------------------
19838
19839 // Bytes vector add
19840 instruct vaddB(vec dst, vec src) %{
19841 predicate(UseAVX == 0);
19842 match(Set dst (AddVB dst src));
19843 format %{ "paddb $dst,$src\t! add packedB" %}
19844 ins_encode %{
19845 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19846 %}
19847 ins_pipe( pipe_slow );
19848 %}
19849
19850 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19851 predicate(UseAVX > 0);
19852 match(Set dst (AddVB src1 src2));
19853 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19854 ins_encode %{
19855 int vlen_enc = vector_length_encoding(this);
19856 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19857 %}
19858 ins_pipe( pipe_slow );
19859 %}
19860
19861 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19862 predicate((UseAVX > 0) &&
19863 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19864 match(Set dst (AddVB src (LoadVector mem)));
19865 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19866 ins_encode %{
19867 int vlen_enc = vector_length_encoding(this);
19868 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19869 %}
19870 ins_pipe( pipe_slow );
19871 %}
19872
19873 // Shorts/Chars vector add
19874 instruct vaddS(vec dst, vec src) %{
19875 predicate(UseAVX == 0);
19876 match(Set dst (AddVS dst src));
19877 format %{ "paddw $dst,$src\t! add packedS" %}
19878 ins_encode %{
19879 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19880 %}
19881 ins_pipe( pipe_slow );
19882 %}
19883
19884 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19885 predicate(UseAVX > 0);
19886 match(Set dst (AddVS src1 src2));
19887 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19888 ins_encode %{
19889 int vlen_enc = vector_length_encoding(this);
19890 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19891 %}
19892 ins_pipe( pipe_slow );
19893 %}
19894
19895 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19896 predicate((UseAVX > 0) &&
19897 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19898 match(Set dst (AddVS src (LoadVector mem)));
19899 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19900 ins_encode %{
19901 int vlen_enc = vector_length_encoding(this);
19902 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19903 %}
19904 ins_pipe( pipe_slow );
19905 %}
19906
19907 // Integers vector add
19908 instruct vaddI(vec dst, vec src) %{
19909 predicate(UseAVX == 0);
19910 match(Set dst (AddVI dst src));
19911 format %{ "paddd $dst,$src\t! add packedI" %}
19912 ins_encode %{
19913 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19914 %}
19915 ins_pipe( pipe_slow );
19916 %}
19917
19918 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19919 predicate(UseAVX > 0);
19920 match(Set dst (AddVI src1 src2));
19921 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19922 ins_encode %{
19923 int vlen_enc = vector_length_encoding(this);
19924 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19925 %}
19926 ins_pipe( pipe_slow );
19927 %}
19928
19929
19930 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19931 predicate((UseAVX > 0) &&
19932 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19933 match(Set dst (AddVI src (LoadVector mem)));
19934 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19935 ins_encode %{
19936 int vlen_enc = vector_length_encoding(this);
19937 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19938 %}
19939 ins_pipe( pipe_slow );
19940 %}
19941
19942 // Longs vector add
19943 instruct vaddL(vec dst, vec src) %{
19944 predicate(UseAVX == 0);
19945 match(Set dst (AddVL dst src));
19946 format %{ "paddq $dst,$src\t! add packedL" %}
19947 ins_encode %{
19948 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19949 %}
19950 ins_pipe( pipe_slow );
19951 %}
19952
19953 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19954 predicate(UseAVX > 0);
19955 match(Set dst (AddVL src1 src2));
19956 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
19957 ins_encode %{
19958 int vlen_enc = vector_length_encoding(this);
19959 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19960 %}
19961 ins_pipe( pipe_slow );
19962 %}
19963
19964 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19965 predicate((UseAVX > 0) &&
19966 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19967 match(Set dst (AddVL src (LoadVector mem)));
19968 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
19969 ins_encode %{
19970 int vlen_enc = vector_length_encoding(this);
19971 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19972 %}
19973 ins_pipe( pipe_slow );
19974 %}
19975
19976 // Floats vector add
19977 instruct vaddF(vec dst, vec src) %{
19978 predicate(UseAVX == 0);
19979 match(Set dst (AddVF dst src));
19980 format %{ "addps $dst,$src\t! add packedF" %}
19981 ins_encode %{
19982 __ addps($dst$$XMMRegister, $src$$XMMRegister);
19983 %}
19984 ins_pipe( pipe_slow );
19985 %}
19986
19987 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19988 predicate(UseAVX > 0);
19989 match(Set dst (AddVF src1 src2));
19990 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
19991 ins_encode %{
19992 int vlen_enc = vector_length_encoding(this);
19993 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19994 %}
19995 ins_pipe( pipe_slow );
19996 %}
19997
19998 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19999 predicate((UseAVX > 0) &&
20000 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20001 match(Set dst (AddVF src (LoadVector mem)));
20002 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20003 ins_encode %{
20004 int vlen_enc = vector_length_encoding(this);
20005 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20006 %}
20007 ins_pipe( pipe_slow );
20008 %}
20009
20010 // Doubles vector add
20011 instruct vaddD(vec dst, vec src) %{
20012 predicate(UseAVX == 0);
20013 match(Set dst (AddVD dst src));
20014 format %{ "addpd $dst,$src\t! add packedD" %}
20015 ins_encode %{
20016 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20017 %}
20018 ins_pipe( pipe_slow );
20019 %}
20020
20021 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20022 predicate(UseAVX > 0);
20023 match(Set dst (AddVD src1 src2));
20024 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20025 ins_encode %{
20026 int vlen_enc = vector_length_encoding(this);
20027 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20028 %}
20029 ins_pipe( pipe_slow );
20030 %}
20031
20032 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20033 predicate((UseAVX > 0) &&
20034 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20035 match(Set dst (AddVD src (LoadVector mem)));
20036 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20037 ins_encode %{
20038 int vlen_enc = vector_length_encoding(this);
20039 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20040 %}
20041 ins_pipe( pipe_slow );
20042 %}
20043
20044 // --------------------------------- SUB --------------------------------------
20045
20046 // Bytes vector sub
20047 instruct vsubB(vec dst, vec src) %{
20048 predicate(UseAVX == 0);
20049 match(Set dst (SubVB dst src));
20050 format %{ "psubb $dst,$src\t! sub packedB" %}
20051 ins_encode %{
20052 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20053 %}
20054 ins_pipe( pipe_slow );
20055 %}
20056
20057 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20058 predicate(UseAVX > 0);
20059 match(Set dst (SubVB src1 src2));
20060 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20061 ins_encode %{
20062 int vlen_enc = vector_length_encoding(this);
20063 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20064 %}
20065 ins_pipe( pipe_slow );
20066 %}
20067
20068 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20069 predicate((UseAVX > 0) &&
20070 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20071 match(Set dst (SubVB src (LoadVector mem)));
20072 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20073 ins_encode %{
20074 int vlen_enc = vector_length_encoding(this);
20075 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20076 %}
20077 ins_pipe( pipe_slow );
20078 %}
20079
20080 // Shorts/Chars vector sub
20081 instruct vsubS(vec dst, vec src) %{
20082 predicate(UseAVX == 0);
20083 match(Set dst (SubVS dst src));
20084 format %{ "psubw $dst,$src\t! sub packedS" %}
20085 ins_encode %{
20086 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20087 %}
20088 ins_pipe( pipe_slow );
20089 %}
20090
20091
20092 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20093 predicate(UseAVX > 0);
20094 match(Set dst (SubVS src1 src2));
20095 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20096 ins_encode %{
20097 int vlen_enc = vector_length_encoding(this);
20098 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20099 %}
20100 ins_pipe( pipe_slow );
20101 %}
20102
20103 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20104 predicate((UseAVX > 0) &&
20105 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20106 match(Set dst (SubVS src (LoadVector mem)));
20107 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20108 ins_encode %{
20109 int vlen_enc = vector_length_encoding(this);
20110 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20111 %}
20112 ins_pipe( pipe_slow );
20113 %}
20114
20115 // Integers vector sub
20116 instruct vsubI(vec dst, vec src) %{
20117 predicate(UseAVX == 0);
20118 match(Set dst (SubVI dst src));
20119 format %{ "psubd $dst,$src\t! sub packedI" %}
20120 ins_encode %{
20121 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20122 %}
20123 ins_pipe( pipe_slow );
20124 %}
20125
20126 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20127 predicate(UseAVX > 0);
20128 match(Set dst (SubVI src1 src2));
20129 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20130 ins_encode %{
20131 int vlen_enc = vector_length_encoding(this);
20132 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20133 %}
20134 ins_pipe( pipe_slow );
20135 %}
20136
20137 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20138 predicate((UseAVX > 0) &&
20139 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20140 match(Set dst (SubVI src (LoadVector mem)));
20141 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20142 ins_encode %{
20143 int vlen_enc = vector_length_encoding(this);
20144 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20145 %}
20146 ins_pipe( pipe_slow );
20147 %}
20148
20149 // Longs vector sub
20150 instruct vsubL(vec dst, vec src) %{
20151 predicate(UseAVX == 0);
20152 match(Set dst (SubVL dst src));
20153 format %{ "psubq $dst,$src\t! sub packedL" %}
20154 ins_encode %{
20155 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20156 %}
20157 ins_pipe( pipe_slow );
20158 %}
20159
20160 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20161 predicate(UseAVX > 0);
20162 match(Set dst (SubVL src1 src2));
20163 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20164 ins_encode %{
20165 int vlen_enc = vector_length_encoding(this);
20166 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20167 %}
20168 ins_pipe( pipe_slow );
20169 %}
20170
20171
20172 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20173 predicate((UseAVX > 0) &&
20174 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20175 match(Set dst (SubVL src (LoadVector mem)));
20176 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20177 ins_encode %{
20178 int vlen_enc = vector_length_encoding(this);
20179 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20180 %}
20181 ins_pipe( pipe_slow );
20182 %}
20183
20184 // Floats vector sub
20185 instruct vsubF(vec dst, vec src) %{
20186 predicate(UseAVX == 0);
20187 match(Set dst (SubVF dst src));
20188 format %{ "subps $dst,$src\t! sub packedF" %}
20189 ins_encode %{
20190 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20191 %}
20192 ins_pipe( pipe_slow );
20193 %}
20194
20195 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20196 predicate(UseAVX > 0);
20197 match(Set dst (SubVF src1 src2));
20198 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20199 ins_encode %{
20200 int vlen_enc = vector_length_encoding(this);
20201 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20202 %}
20203 ins_pipe( pipe_slow );
20204 %}
20205
20206 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20207 predicate((UseAVX > 0) &&
20208 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20209 match(Set dst (SubVF src (LoadVector mem)));
20210 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20211 ins_encode %{
20212 int vlen_enc = vector_length_encoding(this);
20213 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20214 %}
20215 ins_pipe( pipe_slow );
20216 %}
20217
20218 // Doubles vector sub
20219 instruct vsubD(vec dst, vec src) %{
20220 predicate(UseAVX == 0);
20221 match(Set dst (SubVD dst src));
20222 format %{ "subpd $dst,$src\t! sub packedD" %}
20223 ins_encode %{
20224 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20225 %}
20226 ins_pipe( pipe_slow );
20227 %}
20228
20229 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20230 predicate(UseAVX > 0);
20231 match(Set dst (SubVD src1 src2));
20232 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20233 ins_encode %{
20234 int vlen_enc = vector_length_encoding(this);
20235 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20236 %}
20237 ins_pipe( pipe_slow );
20238 %}
20239
20240 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20241 predicate((UseAVX > 0) &&
20242 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20243 match(Set dst (SubVD src (LoadVector mem)));
20244 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20245 ins_encode %{
20246 int vlen_enc = vector_length_encoding(this);
20247 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20248 %}
20249 ins_pipe( pipe_slow );
20250 %}
20251
20252 // --------------------------------- MUL --------------------------------------
20253
20254 // Byte vector mul
20255 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20256 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20257 match(Set dst (MulVB src1 src2));
20258 effect(TEMP dst, TEMP xtmp);
20259 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20260 ins_encode %{
20261 assert(UseSSE > 3, "required");
20262 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20263 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20264 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20265 __ psllw($dst$$XMMRegister, 8);
20266 __ psrlw($dst$$XMMRegister, 8);
20267 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20268 %}
20269 ins_pipe( pipe_slow );
20270 %}
20271
20272 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20273 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20274 match(Set dst (MulVB src1 src2));
20275 effect(TEMP dst, TEMP xtmp);
20276 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20277 ins_encode %{
20278 assert(UseSSE > 3, "required");
20279 // Odd-index elements
20280 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20281 __ psrlw($dst$$XMMRegister, 8);
20282 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20283 __ psrlw($xtmp$$XMMRegister, 8);
20284 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20285 __ psllw($dst$$XMMRegister, 8);
20286 // Even-index elements
20287 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20288 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20289 __ psllw($xtmp$$XMMRegister, 8);
20290 __ psrlw($xtmp$$XMMRegister, 8);
20291 // Combine
20292 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20293 %}
20294 ins_pipe( pipe_slow );
20295 %}
20296
20297 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20298 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20299 match(Set dst (MulVB src1 src2));
20300 effect(TEMP xtmp1, TEMP xtmp2);
20301 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20302 ins_encode %{
20303 int vlen_enc = vector_length_encoding(this);
20304 // Odd-index elements
20305 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20306 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20307 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20308 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20309 // Even-index elements
20310 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20311 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20312 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20313 // Combine
20314 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20315 %}
20316 ins_pipe( pipe_slow );
20317 %}
20318
20319 // Shorts/Chars vector mul
20320 instruct vmulS(vec dst, vec src) %{
20321 predicate(UseAVX == 0);
20322 match(Set dst (MulVS dst src));
20323 format %{ "pmullw $dst,$src\t! mul packedS" %}
20324 ins_encode %{
20325 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20326 %}
20327 ins_pipe( pipe_slow );
20328 %}
20329
20330 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20331 predicate(UseAVX > 0);
20332 match(Set dst (MulVS src1 src2));
20333 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20334 ins_encode %{
20335 int vlen_enc = vector_length_encoding(this);
20336 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20337 %}
20338 ins_pipe( pipe_slow );
20339 %}
20340
20341 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20342 predicate((UseAVX > 0) &&
20343 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20344 match(Set dst (MulVS src (LoadVector mem)));
20345 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20346 ins_encode %{
20347 int vlen_enc = vector_length_encoding(this);
20348 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20349 %}
20350 ins_pipe( pipe_slow );
20351 %}
20352
20353 // Integers vector mul
20354 instruct vmulI(vec dst, vec src) %{
20355 predicate(UseAVX == 0);
20356 match(Set dst (MulVI dst src));
20357 format %{ "pmulld $dst,$src\t! mul packedI" %}
20358 ins_encode %{
20359 assert(UseSSE > 3, "required");
20360 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20361 %}
20362 ins_pipe( pipe_slow );
20363 %}
20364
20365 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20366 predicate(UseAVX > 0);
20367 match(Set dst (MulVI src1 src2));
20368 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20369 ins_encode %{
20370 int vlen_enc = vector_length_encoding(this);
20371 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20372 %}
20373 ins_pipe( pipe_slow );
20374 %}
20375
20376 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20377 predicate((UseAVX > 0) &&
20378 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20379 match(Set dst (MulVI src (LoadVector mem)));
20380 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20381 ins_encode %{
20382 int vlen_enc = vector_length_encoding(this);
20383 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20384 %}
20385 ins_pipe( pipe_slow );
20386 %}
20387
20388 // Longs vector mul
20389 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20390 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20391 VM_Version::supports_avx512dq()) ||
20392 VM_Version::supports_avx512vldq());
20393 match(Set dst (MulVL src1 src2));
20394 ins_cost(500);
20395 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20396 ins_encode %{
20397 assert(UseAVX > 2, "required");
20398 int vlen_enc = vector_length_encoding(this);
20399 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20400 %}
20401 ins_pipe( pipe_slow );
20402 %}
20403
20404 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20405 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20406 VM_Version::supports_avx512dq()) ||
20407 (Matcher::vector_length_in_bytes(n) > 8 &&
20408 VM_Version::supports_avx512vldq()));
20409 match(Set dst (MulVL src (LoadVector mem)));
20410 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20411 ins_cost(500);
20412 ins_encode %{
20413 assert(UseAVX > 2, "required");
20414 int vlen_enc = vector_length_encoding(this);
20415 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20416 %}
20417 ins_pipe( pipe_slow );
20418 %}
20419
20420 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20421 predicate(UseAVX == 0);
20422 match(Set dst (MulVL src1 src2));
20423 ins_cost(500);
20424 effect(TEMP dst, TEMP xtmp);
20425 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20426 ins_encode %{
20427 assert(VM_Version::supports_sse4_1(), "required");
20428 // Get the lo-hi products, only the lower 32 bits is in concerns
20429 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20430 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20431 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20432 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20433 __ psllq($dst$$XMMRegister, 32);
20434 // Get the lo-lo products
20435 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20436 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20437 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20438 %}
20439 ins_pipe( pipe_slow );
20440 %}
20441
20442 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20443 predicate(UseAVX > 0 &&
20444 ((Matcher::vector_length_in_bytes(n) == 64 &&
20445 !VM_Version::supports_avx512dq()) ||
20446 (Matcher::vector_length_in_bytes(n) < 64 &&
20447 !VM_Version::supports_avx512vldq())));
20448 match(Set dst (MulVL src1 src2));
20449 effect(TEMP xtmp1, TEMP xtmp2);
20450 ins_cost(500);
20451 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20452 ins_encode %{
20453 int vlen_enc = vector_length_encoding(this);
20454 // Get the lo-hi products, only the lower 32 bits is in concerns
20455 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20456 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20457 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20458 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20459 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20460 // Get the lo-lo products
20461 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20462 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20463 %}
20464 ins_pipe( pipe_slow );
20465 %}
20466
20467 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20468 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20469 match(Set dst (MulVL src1 src2));
20470 ins_cost(100);
20471 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20472 ins_encode %{
20473 int vlen_enc = vector_length_encoding(this);
20474 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20475 %}
20476 ins_pipe( pipe_slow );
20477 %}
20478
20479 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20480 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20481 match(Set dst (MulVL src1 src2));
20482 ins_cost(100);
20483 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20484 ins_encode %{
20485 int vlen_enc = vector_length_encoding(this);
20486 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20487 %}
20488 ins_pipe( pipe_slow );
20489 %}
20490
20491 // Floats vector mul
20492 instruct vmulF(vec dst, vec src) %{
20493 predicate(UseAVX == 0);
20494 match(Set dst (MulVF dst src));
20495 format %{ "mulps $dst,$src\t! mul packedF" %}
20496 ins_encode %{
20497 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20498 %}
20499 ins_pipe( pipe_slow );
20500 %}
20501
20502 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20503 predicate(UseAVX > 0);
20504 match(Set dst (MulVF src1 src2));
20505 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20506 ins_encode %{
20507 int vlen_enc = vector_length_encoding(this);
20508 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20509 %}
20510 ins_pipe( pipe_slow );
20511 %}
20512
20513 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20514 predicate((UseAVX > 0) &&
20515 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20516 match(Set dst (MulVF src (LoadVector mem)));
20517 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20518 ins_encode %{
20519 int vlen_enc = vector_length_encoding(this);
20520 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20521 %}
20522 ins_pipe( pipe_slow );
20523 %}
20524
20525 // Doubles vector mul
20526 instruct vmulD(vec dst, vec src) %{
20527 predicate(UseAVX == 0);
20528 match(Set dst (MulVD dst src));
20529 format %{ "mulpd $dst,$src\t! mul packedD" %}
20530 ins_encode %{
20531 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20532 %}
20533 ins_pipe( pipe_slow );
20534 %}
20535
20536 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20537 predicate(UseAVX > 0);
20538 match(Set dst (MulVD src1 src2));
20539 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20540 ins_encode %{
20541 int vlen_enc = vector_length_encoding(this);
20542 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20543 %}
20544 ins_pipe( pipe_slow );
20545 %}
20546
20547 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20548 predicate((UseAVX > 0) &&
20549 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20550 match(Set dst (MulVD src (LoadVector mem)));
20551 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20552 ins_encode %{
20553 int vlen_enc = vector_length_encoding(this);
20554 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20555 %}
20556 ins_pipe( pipe_slow );
20557 %}
20558
20559 // --------------------------------- DIV --------------------------------------
20560
20561 // Floats vector div
20562 instruct vdivF(vec dst, vec src) %{
20563 predicate(UseAVX == 0);
20564 match(Set dst (DivVF dst src));
20565 format %{ "divps $dst,$src\t! div packedF" %}
20566 ins_encode %{
20567 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20568 %}
20569 ins_pipe( pipe_slow );
20570 %}
20571
20572 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20573 predicate(UseAVX > 0);
20574 match(Set dst (DivVF src1 src2));
20575 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20576 ins_encode %{
20577 int vlen_enc = vector_length_encoding(this);
20578 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20579 %}
20580 ins_pipe( pipe_slow );
20581 %}
20582
20583 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20584 predicate((UseAVX > 0) &&
20585 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20586 match(Set dst (DivVF src (LoadVector mem)));
20587 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20588 ins_encode %{
20589 int vlen_enc = vector_length_encoding(this);
20590 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20591 %}
20592 ins_pipe( pipe_slow );
20593 %}
20594
20595 // Doubles vector div
20596 instruct vdivD(vec dst, vec src) %{
20597 predicate(UseAVX == 0);
20598 match(Set dst (DivVD dst src));
20599 format %{ "divpd $dst,$src\t! div packedD" %}
20600 ins_encode %{
20601 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20602 %}
20603 ins_pipe( pipe_slow );
20604 %}
20605
20606 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20607 predicate(UseAVX > 0);
20608 match(Set dst (DivVD src1 src2));
20609 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20610 ins_encode %{
20611 int vlen_enc = vector_length_encoding(this);
20612 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20613 %}
20614 ins_pipe( pipe_slow );
20615 %}
20616
20617 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20618 predicate((UseAVX > 0) &&
20619 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20620 match(Set dst (DivVD src (LoadVector mem)));
20621 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20622 ins_encode %{
20623 int vlen_enc = vector_length_encoding(this);
20624 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20625 %}
20626 ins_pipe( pipe_slow );
20627 %}
20628
20629 // ------------------------------ MinMax ---------------------------------------
20630
20631 // Byte, Short, Int vector Min/Max
20632 instruct minmax_reg_sse(vec dst, vec src) %{
20633 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20634 UseAVX == 0);
20635 match(Set dst (MinV dst src));
20636 match(Set dst (MaxV dst src));
20637 format %{ "vector_minmax $dst,$src\t! " %}
20638 ins_encode %{
20639 assert(UseSSE >= 4, "required");
20640
20641 int opcode = this->ideal_Opcode();
20642 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20643 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20644 %}
20645 ins_pipe( pipe_slow );
20646 %}
20647
20648 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20649 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20650 UseAVX > 0);
20651 match(Set dst (MinV src1 src2));
20652 match(Set dst (MaxV src1 src2));
20653 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20654 ins_encode %{
20655 int opcode = this->ideal_Opcode();
20656 int vlen_enc = vector_length_encoding(this);
20657 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20658
20659 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20660 %}
20661 ins_pipe( pipe_slow );
20662 %}
20663
20664 // Long vector Min/Max
20665 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20666 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20667 UseAVX == 0);
20668 match(Set dst (MinV dst src));
20669 match(Set dst (MaxV src dst));
20670 effect(TEMP dst, TEMP tmp);
20671 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20672 ins_encode %{
20673 assert(UseSSE >= 4, "required");
20674
20675 int opcode = this->ideal_Opcode();
20676 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20677 assert(elem_bt == T_LONG, "sanity");
20678
20679 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20680 %}
20681 ins_pipe( pipe_slow );
20682 %}
20683
20684 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20685 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20686 UseAVX > 0 && !VM_Version::supports_avx512vl());
20687 match(Set dst (MinV src1 src2));
20688 match(Set dst (MaxV src1 src2));
20689 effect(TEMP dst);
20690 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20691 ins_encode %{
20692 int vlen_enc = vector_length_encoding(this);
20693 int opcode = this->ideal_Opcode();
20694 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20695 assert(elem_bt == T_LONG, "sanity");
20696
20697 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20698 %}
20699 ins_pipe( pipe_slow );
20700 %}
20701
20702 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20703 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20704 Matcher::vector_element_basic_type(n) == T_LONG);
20705 match(Set dst (MinV src1 src2));
20706 match(Set dst (MaxV src1 src2));
20707 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20708 ins_encode %{
20709 assert(UseAVX > 2, "required");
20710
20711 int vlen_enc = vector_length_encoding(this);
20712 int opcode = this->ideal_Opcode();
20713 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20714 assert(elem_bt == T_LONG, "sanity");
20715
20716 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20717 %}
20718 ins_pipe( pipe_slow );
20719 %}
20720
20721 // Float/Double vector Min/Max
20722 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20723 predicate(VM_Version::supports_avx10_2() &&
20724 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20725 match(Set dst (MinV a b));
20726 match(Set dst (MaxV a b));
20727 format %{ "vector_minmaxFP $dst, $a, $b" %}
20728 ins_encode %{
20729 int vlen_enc = vector_length_encoding(this);
20730 int opcode = this->ideal_Opcode();
20731 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20732 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20733 %}
20734 ins_pipe( pipe_slow );
20735 %}
20736
20737 // Float/Double vector Min/Max
20738 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20739 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20740 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20741 UseAVX > 0);
20742 match(Set dst (MinV a b));
20743 match(Set dst (MaxV a b));
20744 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20745 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20746 ins_encode %{
20747 assert(UseAVX > 0, "required");
20748
20749 int opcode = this->ideal_Opcode();
20750 int vlen_enc = vector_length_encoding(this);
20751 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20752
20753 __ vminmax_fp(opcode, elem_bt,
20754 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20755 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20756 %}
20757 ins_pipe( pipe_slow );
20758 %}
20759
20760 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20761 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20762 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20763 match(Set dst (MinV a b));
20764 match(Set dst (MaxV a b));
20765 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20766 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20767 ins_encode %{
20768 assert(UseAVX > 2, "required");
20769
20770 int opcode = this->ideal_Opcode();
20771 int vlen_enc = vector_length_encoding(this);
20772 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20773
20774 __ evminmax_fp(opcode, elem_bt,
20775 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20776 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20777 %}
20778 ins_pipe( pipe_slow );
20779 %}
20780
20781 // ------------------------------ Unsigned vector Min/Max ----------------------
20782
20783 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20784 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20785 match(Set dst (UMinV a b));
20786 match(Set dst (UMaxV a b));
20787 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20788 ins_encode %{
20789 int opcode = this->ideal_Opcode();
20790 int vlen_enc = vector_length_encoding(this);
20791 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20792 assert(is_integral_type(elem_bt), "");
20793 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20794 %}
20795 ins_pipe( pipe_slow );
20796 %}
20797
20798 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20799 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20800 match(Set dst (UMinV a (LoadVector b)));
20801 match(Set dst (UMaxV a (LoadVector b)));
20802 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20803 ins_encode %{
20804 int opcode = this->ideal_Opcode();
20805 int vlen_enc = vector_length_encoding(this);
20806 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20807 assert(is_integral_type(elem_bt), "");
20808 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20809 %}
20810 ins_pipe( pipe_slow );
20811 %}
20812
20813 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20814 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20815 match(Set dst (UMinV a b));
20816 match(Set dst (UMaxV a b));
20817 effect(TEMP xtmp1, TEMP xtmp2);
20818 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20819 ins_encode %{
20820 int opcode = this->ideal_Opcode();
20821 int vlen_enc = vector_length_encoding(this);
20822 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20823 %}
20824 ins_pipe( pipe_slow );
20825 %}
20826
20827 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20828 match(Set dst (UMinV (Binary dst src2) mask));
20829 match(Set dst (UMaxV (Binary dst src2) mask));
20830 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20831 ins_encode %{
20832 int vlen_enc = vector_length_encoding(this);
20833 BasicType bt = Matcher::vector_element_basic_type(this);
20834 int opc = this->ideal_Opcode();
20835 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20836 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20837 %}
20838 ins_pipe( pipe_slow );
20839 %}
20840
20841 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20842 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20843 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20844 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20845 ins_encode %{
20846 int vlen_enc = vector_length_encoding(this);
20847 BasicType bt = Matcher::vector_element_basic_type(this);
20848 int opc = this->ideal_Opcode();
20849 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20850 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20851 %}
20852 ins_pipe( pipe_slow );
20853 %}
20854
20855 // --------------------------------- Signum/CopySign ---------------------------
20856
20857 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20858 match(Set dst (SignumF dst (Binary zero one)));
20859 effect(KILL cr);
20860 format %{ "signumF $dst, $dst" %}
20861 ins_encode %{
20862 int opcode = this->ideal_Opcode();
20863 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20864 %}
20865 ins_pipe( pipe_slow );
20866 %}
20867
20868 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20869 match(Set dst (SignumD dst (Binary zero one)));
20870 effect(KILL cr);
20871 format %{ "signumD $dst, $dst" %}
20872 ins_encode %{
20873 int opcode = this->ideal_Opcode();
20874 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20875 %}
20876 ins_pipe( pipe_slow );
20877 %}
20878
20879 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20880 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20881 match(Set dst (SignumVF src (Binary zero one)));
20882 match(Set dst (SignumVD src (Binary zero one)));
20883 effect(TEMP dst, TEMP xtmp1);
20884 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20885 ins_encode %{
20886 int opcode = this->ideal_Opcode();
20887 int vec_enc = vector_length_encoding(this);
20888 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20889 $xtmp1$$XMMRegister, vec_enc);
20890 %}
20891 ins_pipe( pipe_slow );
20892 %}
20893
20894 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20895 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20896 match(Set dst (SignumVF src (Binary zero one)));
20897 match(Set dst (SignumVD src (Binary zero one)));
20898 effect(TEMP dst, TEMP ktmp1);
20899 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20900 ins_encode %{
20901 int opcode = this->ideal_Opcode();
20902 int vec_enc = vector_length_encoding(this);
20903 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20904 $ktmp1$$KRegister, vec_enc);
20905 %}
20906 ins_pipe( pipe_slow );
20907 %}
20908
20909 // ---------------------------------------
20910 // For copySign use 0xE4 as writemask for vpternlog
20911 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20912 // C (xmm2) is set to 0x7FFFFFFF
20913 // Wherever xmm2 is 0, we want to pick from B (sign)
20914 // Wherever xmm2 is 1, we want to pick from A (src)
20915 //
20916 // A B C Result
20917 // 0 0 0 0
20918 // 0 0 1 0
20919 // 0 1 0 1
20920 // 0 1 1 0
20921 // 1 0 0 0
20922 // 1 0 1 1
20923 // 1 1 0 1
20924 // 1 1 1 1
20925 //
20926 // Result going from high bit to low bit is 0x11100100 = 0xe4
20927 // ---------------------------------------
20928
20929 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20930 match(Set dst (CopySignF dst src));
20931 effect(TEMP tmp1, TEMP tmp2);
20932 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20933 ins_encode %{
20934 __ movl($tmp2$$Register, 0x7FFFFFFF);
20935 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20936 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20937 %}
20938 ins_pipe( pipe_slow );
20939 %}
20940
20941 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20942 match(Set dst (CopySignD dst (Binary src zero)));
20943 ins_cost(100);
20944 effect(TEMP tmp1, TEMP tmp2);
20945 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20946 ins_encode %{
20947 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20948 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20949 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20950 %}
20951 ins_pipe( pipe_slow );
20952 %}
20953
20954 //----------------------------- CompressBits/ExpandBits ------------------------
20955
20956 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20957 predicate(n->bottom_type()->isa_int());
20958 match(Set dst (CompressBits src mask));
20959 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20960 ins_encode %{
20961 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20962 %}
20963 ins_pipe( pipe_slow );
20964 %}
20965
20966 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20967 predicate(n->bottom_type()->isa_int());
20968 match(Set dst (ExpandBits src mask));
20969 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20970 ins_encode %{
20971 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20972 %}
20973 ins_pipe( pipe_slow );
20974 %}
20975
20976 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20977 predicate(n->bottom_type()->isa_int());
20978 match(Set dst (CompressBits src (LoadI mask)));
20979 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
20980 ins_encode %{
20981 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20982 %}
20983 ins_pipe( pipe_slow );
20984 %}
20985
20986 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20987 predicate(n->bottom_type()->isa_int());
20988 match(Set dst (ExpandBits src (LoadI mask)));
20989 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
20990 ins_encode %{
20991 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20992 %}
20993 ins_pipe( pipe_slow );
20994 %}
20995
20996 // --------------------------------- Sqrt --------------------------------------
20997
20998 instruct vsqrtF_reg(vec dst, vec src) %{
20999 match(Set dst (SqrtVF src));
21000 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21001 ins_encode %{
21002 assert(UseAVX > 0, "required");
21003 int vlen_enc = vector_length_encoding(this);
21004 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21005 %}
21006 ins_pipe( pipe_slow );
21007 %}
21008
21009 instruct vsqrtF_mem(vec dst, memory mem) %{
21010 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21011 match(Set dst (SqrtVF (LoadVector mem)));
21012 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21013 ins_encode %{
21014 assert(UseAVX > 0, "required");
21015 int vlen_enc = vector_length_encoding(this);
21016 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21017 %}
21018 ins_pipe( pipe_slow );
21019 %}
21020
21021 // Floating point vector sqrt
21022 instruct vsqrtD_reg(vec dst, vec src) %{
21023 match(Set dst (SqrtVD src));
21024 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21025 ins_encode %{
21026 assert(UseAVX > 0, "required");
21027 int vlen_enc = vector_length_encoding(this);
21028 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21029 %}
21030 ins_pipe( pipe_slow );
21031 %}
21032
21033 instruct vsqrtD_mem(vec dst, memory mem) %{
21034 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21035 match(Set dst (SqrtVD (LoadVector mem)));
21036 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21037 ins_encode %{
21038 assert(UseAVX > 0, "required");
21039 int vlen_enc = vector_length_encoding(this);
21040 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21041 %}
21042 ins_pipe( pipe_slow );
21043 %}
21044
21045 // ------------------------------ Shift ---------------------------------------
21046
21047 // Left and right shift count vectors are the same on x86
21048 // (only lowest bits of xmm reg are used for count).
21049 instruct vshiftcnt(vec dst, rRegI cnt) %{
21050 match(Set dst (LShiftCntV cnt));
21051 match(Set dst (RShiftCntV cnt));
21052 format %{ "movdl $dst,$cnt\t! load shift count" %}
21053 ins_encode %{
21054 __ movdl($dst$$XMMRegister, $cnt$$Register);
21055 %}
21056 ins_pipe( pipe_slow );
21057 %}
21058
21059 // Byte vector shift
21060 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21061 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21062 match(Set dst ( LShiftVB src shift));
21063 match(Set dst ( RShiftVB src shift));
21064 match(Set dst (URShiftVB src shift));
21065 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21066 format %{"vector_byte_shift $dst,$src,$shift" %}
21067 ins_encode %{
21068 assert(UseSSE > 3, "required");
21069 int opcode = this->ideal_Opcode();
21070 bool sign = (opcode != Op_URShiftVB);
21071 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21072 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21073 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21074 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21075 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21076 %}
21077 ins_pipe( pipe_slow );
21078 %}
21079
21080 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21081 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21082 UseAVX <= 1);
21083 match(Set dst ( LShiftVB src shift));
21084 match(Set dst ( RShiftVB src shift));
21085 match(Set dst (URShiftVB src shift));
21086 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21087 format %{"vector_byte_shift $dst,$src,$shift" %}
21088 ins_encode %{
21089 assert(UseSSE > 3, "required");
21090 int opcode = this->ideal_Opcode();
21091 bool sign = (opcode != Op_URShiftVB);
21092 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21093 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21094 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21095 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21096 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21097 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21098 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21099 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21100 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21101 %}
21102 ins_pipe( pipe_slow );
21103 %}
21104
21105 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21106 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21107 UseAVX > 1);
21108 match(Set dst ( LShiftVB src shift));
21109 match(Set dst ( RShiftVB src shift));
21110 match(Set dst (URShiftVB src shift));
21111 effect(TEMP dst, TEMP tmp);
21112 format %{"vector_byte_shift $dst,$src,$shift" %}
21113 ins_encode %{
21114 int opcode = this->ideal_Opcode();
21115 bool sign = (opcode != Op_URShiftVB);
21116 int vlen_enc = Assembler::AVX_256bit;
21117 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21118 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21119 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21120 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21121 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21122 %}
21123 ins_pipe( pipe_slow );
21124 %}
21125
21126 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21127 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21128 match(Set dst ( LShiftVB src shift));
21129 match(Set dst ( RShiftVB src shift));
21130 match(Set dst (URShiftVB src shift));
21131 effect(TEMP dst, TEMP tmp);
21132 format %{"vector_byte_shift $dst,$src,$shift" %}
21133 ins_encode %{
21134 assert(UseAVX > 1, "required");
21135 int opcode = this->ideal_Opcode();
21136 bool sign = (opcode != Op_URShiftVB);
21137 int vlen_enc = Assembler::AVX_256bit;
21138 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21139 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21140 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21141 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21142 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21143 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21144 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21145 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21146 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21147 %}
21148 ins_pipe( pipe_slow );
21149 %}
21150
21151 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21152 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21153 match(Set dst ( LShiftVB src shift));
21154 match(Set dst (RShiftVB src shift));
21155 match(Set dst (URShiftVB src shift));
21156 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21157 format %{"vector_byte_shift $dst,$src,$shift" %}
21158 ins_encode %{
21159 assert(UseAVX > 2, "required");
21160 int opcode = this->ideal_Opcode();
21161 bool sign = (opcode != Op_URShiftVB);
21162 int vlen_enc = Assembler::AVX_512bit;
21163 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21164 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21165 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21166 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21167 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21168 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21169 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21170 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21171 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21172 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21173 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21174 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21175 %}
21176 ins_pipe( pipe_slow );
21177 %}
21178
21179 // Shorts vector logical right shift produces incorrect Java result
21180 // for negative data because java code convert short value into int with
21181 // sign extension before a shift. But char vectors are fine since chars are
21182 // unsigned values.
21183 // Shorts/Chars vector left shift
21184 instruct vshiftS(vec dst, vec src, vec shift) %{
21185 predicate(!n->as_ShiftV()->is_var_shift());
21186 match(Set dst ( LShiftVS src shift));
21187 match(Set dst ( RShiftVS src shift));
21188 match(Set dst (URShiftVS src shift));
21189 effect(TEMP dst, USE src, USE shift);
21190 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21191 ins_encode %{
21192 int opcode = this->ideal_Opcode();
21193 if (UseAVX > 0) {
21194 int vlen_enc = vector_length_encoding(this);
21195 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21196 } else {
21197 int vlen = Matcher::vector_length(this);
21198 if (vlen == 2) {
21199 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21200 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21201 } else if (vlen == 4) {
21202 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21203 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21204 } else {
21205 assert (vlen == 8, "sanity");
21206 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21207 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21208 }
21209 }
21210 %}
21211 ins_pipe( pipe_slow );
21212 %}
21213
21214 // Integers vector left shift
21215 instruct vshiftI(vec dst, vec src, vec shift) %{
21216 predicate(!n->as_ShiftV()->is_var_shift());
21217 match(Set dst ( LShiftVI src shift));
21218 match(Set dst ( RShiftVI src shift));
21219 match(Set dst (URShiftVI src shift));
21220 effect(TEMP dst, USE src, USE shift);
21221 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21222 ins_encode %{
21223 int opcode = this->ideal_Opcode();
21224 if (UseAVX > 0) {
21225 int vlen_enc = vector_length_encoding(this);
21226 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21227 } else {
21228 int vlen = Matcher::vector_length(this);
21229 if (vlen == 2) {
21230 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21231 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21232 } else {
21233 assert(vlen == 4, "sanity");
21234 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21235 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21236 }
21237 }
21238 %}
21239 ins_pipe( pipe_slow );
21240 %}
21241
21242 // Integers vector left constant shift
21243 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21244 match(Set dst (LShiftVI src (LShiftCntV shift)));
21245 match(Set dst (RShiftVI src (RShiftCntV shift)));
21246 match(Set dst (URShiftVI src (RShiftCntV shift)));
21247 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21248 ins_encode %{
21249 int opcode = this->ideal_Opcode();
21250 if (UseAVX > 0) {
21251 int vector_len = vector_length_encoding(this);
21252 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21253 } else {
21254 int vlen = Matcher::vector_length(this);
21255 if (vlen == 2) {
21256 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21257 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21258 } else {
21259 assert(vlen == 4, "sanity");
21260 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21261 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21262 }
21263 }
21264 %}
21265 ins_pipe( pipe_slow );
21266 %}
21267
21268 // Longs vector shift
21269 instruct vshiftL(vec dst, vec src, vec shift) %{
21270 predicate(!n->as_ShiftV()->is_var_shift());
21271 match(Set dst ( LShiftVL src shift));
21272 match(Set dst (URShiftVL src shift));
21273 effect(TEMP dst, USE src, USE shift);
21274 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21275 ins_encode %{
21276 int opcode = this->ideal_Opcode();
21277 if (UseAVX > 0) {
21278 int vlen_enc = vector_length_encoding(this);
21279 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21280 } else {
21281 assert(Matcher::vector_length(this) == 2, "");
21282 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21283 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21284 }
21285 %}
21286 ins_pipe( pipe_slow );
21287 %}
21288
21289 // Longs vector constant shift
21290 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21291 match(Set dst (LShiftVL src (LShiftCntV shift)));
21292 match(Set dst (URShiftVL src (RShiftCntV shift)));
21293 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21294 ins_encode %{
21295 int opcode = this->ideal_Opcode();
21296 if (UseAVX > 0) {
21297 int vector_len = vector_length_encoding(this);
21298 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21299 } else {
21300 assert(Matcher::vector_length(this) == 2, "");
21301 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21302 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21303 }
21304 %}
21305 ins_pipe( pipe_slow );
21306 %}
21307
21308 // -------------------ArithmeticRightShift -----------------------------------
21309 // Long vector arithmetic right shift
21310 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21311 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21312 match(Set dst (RShiftVL src shift));
21313 effect(TEMP dst, TEMP tmp);
21314 format %{ "vshiftq $dst,$src,$shift" %}
21315 ins_encode %{
21316 uint vlen = Matcher::vector_length(this);
21317 if (vlen == 2) {
21318 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21319 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21320 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21321 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21322 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21323 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21324 } else {
21325 assert(vlen == 4, "sanity");
21326 assert(UseAVX > 1, "required");
21327 int vlen_enc = Assembler::AVX_256bit;
21328 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21329 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21330 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21331 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21332 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21333 }
21334 %}
21335 ins_pipe( pipe_slow );
21336 %}
21337
21338 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21339 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21340 match(Set dst (RShiftVL src shift));
21341 format %{ "vshiftq $dst,$src,$shift" %}
21342 ins_encode %{
21343 int vlen_enc = vector_length_encoding(this);
21344 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21345 %}
21346 ins_pipe( pipe_slow );
21347 %}
21348
21349 // ------------------- Variable Shift -----------------------------
21350 // Byte variable shift
21351 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21352 predicate(Matcher::vector_length(n) <= 8 &&
21353 n->as_ShiftV()->is_var_shift() &&
21354 !VM_Version::supports_avx512bw());
21355 match(Set dst ( LShiftVB src shift));
21356 match(Set dst ( RShiftVB src shift));
21357 match(Set dst (URShiftVB src shift));
21358 effect(TEMP dst, TEMP vtmp);
21359 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21360 ins_encode %{
21361 assert(UseAVX >= 2, "required");
21362
21363 int opcode = this->ideal_Opcode();
21364 int vlen_enc = Assembler::AVX_128bit;
21365 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21366 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21367 %}
21368 ins_pipe( pipe_slow );
21369 %}
21370
21371 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21372 predicate(Matcher::vector_length(n) == 16 &&
21373 n->as_ShiftV()->is_var_shift() &&
21374 !VM_Version::supports_avx512bw());
21375 match(Set dst ( LShiftVB src shift));
21376 match(Set dst ( RShiftVB src shift));
21377 match(Set dst (URShiftVB src shift));
21378 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21379 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21380 ins_encode %{
21381 assert(UseAVX >= 2, "required");
21382
21383 int opcode = this->ideal_Opcode();
21384 int vlen_enc = Assembler::AVX_128bit;
21385 // Shift lower half and get word result in dst
21386 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21387
21388 // Shift upper half and get word result in vtmp1
21389 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21390 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21391 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21392
21393 // Merge and down convert the two word results to byte in dst
21394 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21395 %}
21396 ins_pipe( pipe_slow );
21397 %}
21398
21399 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21400 predicate(Matcher::vector_length(n) == 32 &&
21401 n->as_ShiftV()->is_var_shift() &&
21402 !VM_Version::supports_avx512bw());
21403 match(Set dst ( LShiftVB src shift));
21404 match(Set dst ( RShiftVB src shift));
21405 match(Set dst (URShiftVB src shift));
21406 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21407 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21408 ins_encode %{
21409 assert(UseAVX >= 2, "required");
21410
21411 int opcode = this->ideal_Opcode();
21412 int vlen_enc = Assembler::AVX_128bit;
21413 // Process lower 128 bits and get result in dst
21414 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21415 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21416 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21417 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21418 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21419
21420 // Process higher 128 bits and get result in vtmp3
21421 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21422 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21423 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21424 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21425 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21426 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21427 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21428
21429 // Merge the two results in dst
21430 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21431 %}
21432 ins_pipe( pipe_slow );
21433 %}
21434
21435 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21436 predicate(Matcher::vector_length(n) <= 32 &&
21437 n->as_ShiftV()->is_var_shift() &&
21438 VM_Version::supports_avx512bw());
21439 match(Set dst ( LShiftVB src shift));
21440 match(Set dst ( RShiftVB src shift));
21441 match(Set dst (URShiftVB src shift));
21442 effect(TEMP dst, TEMP vtmp);
21443 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21444 ins_encode %{
21445 assert(UseAVX > 2, "required");
21446
21447 int opcode = this->ideal_Opcode();
21448 int vlen_enc = vector_length_encoding(this);
21449 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21450 %}
21451 ins_pipe( pipe_slow );
21452 %}
21453
21454 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21455 predicate(Matcher::vector_length(n) == 64 &&
21456 n->as_ShiftV()->is_var_shift() &&
21457 VM_Version::supports_avx512bw());
21458 match(Set dst ( LShiftVB src shift));
21459 match(Set dst ( RShiftVB src shift));
21460 match(Set dst (URShiftVB src shift));
21461 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21462 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21463 ins_encode %{
21464 assert(UseAVX > 2, "required");
21465
21466 int opcode = this->ideal_Opcode();
21467 int vlen_enc = Assembler::AVX_256bit;
21468 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21469 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21470 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21471 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21472 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21473 %}
21474 ins_pipe( pipe_slow );
21475 %}
21476
21477 // Short variable shift
21478 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21479 predicate(Matcher::vector_length(n) <= 8 &&
21480 n->as_ShiftV()->is_var_shift() &&
21481 !VM_Version::supports_avx512bw());
21482 match(Set dst ( LShiftVS src shift));
21483 match(Set dst ( RShiftVS src shift));
21484 match(Set dst (URShiftVS src shift));
21485 effect(TEMP dst, TEMP vtmp);
21486 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21487 ins_encode %{
21488 assert(UseAVX >= 2, "required");
21489
21490 int opcode = this->ideal_Opcode();
21491 bool sign = (opcode != Op_URShiftVS);
21492 int vlen_enc = Assembler::AVX_256bit;
21493 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21494 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21495 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21496 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21497 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21498 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21499 %}
21500 ins_pipe( pipe_slow );
21501 %}
21502
21503 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21504 predicate(Matcher::vector_length(n) == 16 &&
21505 n->as_ShiftV()->is_var_shift() &&
21506 !VM_Version::supports_avx512bw());
21507 match(Set dst ( LShiftVS src shift));
21508 match(Set dst ( RShiftVS src shift));
21509 match(Set dst (URShiftVS src shift));
21510 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21511 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21512 ins_encode %{
21513 assert(UseAVX >= 2, "required");
21514
21515 int opcode = this->ideal_Opcode();
21516 bool sign = (opcode != Op_URShiftVS);
21517 int vlen_enc = Assembler::AVX_256bit;
21518 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21519 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21520 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21521 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21522 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21523
21524 // Shift upper half, with result in dst using vtmp1 as TEMP
21525 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21526 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21527 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21528 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21529 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21530 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21531
21532 // Merge lower and upper half result into dst
21533 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21534 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21535 %}
21536 ins_pipe( pipe_slow );
21537 %}
21538
21539 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21540 predicate(n->as_ShiftV()->is_var_shift() &&
21541 VM_Version::supports_avx512bw());
21542 match(Set dst ( LShiftVS src shift));
21543 match(Set dst ( RShiftVS src shift));
21544 match(Set dst (URShiftVS src shift));
21545 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21546 ins_encode %{
21547 assert(UseAVX > 2, "required");
21548
21549 int opcode = this->ideal_Opcode();
21550 int vlen_enc = vector_length_encoding(this);
21551 if (!VM_Version::supports_avx512vl()) {
21552 vlen_enc = Assembler::AVX_512bit;
21553 }
21554 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21555 %}
21556 ins_pipe( pipe_slow );
21557 %}
21558
21559 //Integer variable shift
21560 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21561 predicate(n->as_ShiftV()->is_var_shift());
21562 match(Set dst ( LShiftVI src shift));
21563 match(Set dst ( RShiftVI src shift));
21564 match(Set dst (URShiftVI src shift));
21565 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21566 ins_encode %{
21567 assert(UseAVX >= 2, "required");
21568
21569 int opcode = this->ideal_Opcode();
21570 int vlen_enc = vector_length_encoding(this);
21571 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21572 %}
21573 ins_pipe( pipe_slow );
21574 %}
21575
21576 //Long variable shift
21577 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21578 predicate(n->as_ShiftV()->is_var_shift());
21579 match(Set dst ( LShiftVL src shift));
21580 match(Set dst (URShiftVL src shift));
21581 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21582 ins_encode %{
21583 assert(UseAVX >= 2, "required");
21584
21585 int opcode = this->ideal_Opcode();
21586 int vlen_enc = vector_length_encoding(this);
21587 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21588 %}
21589 ins_pipe( pipe_slow );
21590 %}
21591
21592 //Long variable right shift arithmetic
21593 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21594 predicate(Matcher::vector_length(n) <= 4 &&
21595 n->as_ShiftV()->is_var_shift() &&
21596 UseAVX == 2);
21597 match(Set dst (RShiftVL src shift));
21598 effect(TEMP dst, TEMP vtmp);
21599 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21600 ins_encode %{
21601 int opcode = this->ideal_Opcode();
21602 int vlen_enc = vector_length_encoding(this);
21603 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21604 $vtmp$$XMMRegister);
21605 %}
21606 ins_pipe( pipe_slow );
21607 %}
21608
21609 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21610 predicate(n->as_ShiftV()->is_var_shift() &&
21611 UseAVX > 2);
21612 match(Set dst (RShiftVL src shift));
21613 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21614 ins_encode %{
21615 int opcode = this->ideal_Opcode();
21616 int vlen_enc = vector_length_encoding(this);
21617 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21618 %}
21619 ins_pipe( pipe_slow );
21620 %}
21621
21622 // --------------------------------- AND --------------------------------------
21623
21624 instruct vand(vec dst, vec src) %{
21625 predicate(UseAVX == 0);
21626 match(Set dst (AndV dst src));
21627 format %{ "pand $dst,$src\t! and vectors" %}
21628 ins_encode %{
21629 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21630 %}
21631 ins_pipe( pipe_slow );
21632 %}
21633
21634 instruct vand_reg(vec dst, vec src1, vec src2) %{
21635 predicate(UseAVX > 0);
21636 match(Set dst (AndV src1 src2));
21637 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21638 ins_encode %{
21639 int vlen_enc = vector_length_encoding(this);
21640 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21641 %}
21642 ins_pipe( pipe_slow );
21643 %}
21644
21645 instruct vand_mem(vec dst, vec src, memory mem) %{
21646 predicate((UseAVX > 0) &&
21647 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21648 match(Set dst (AndV src (LoadVector mem)));
21649 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21650 ins_encode %{
21651 int vlen_enc = vector_length_encoding(this);
21652 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21653 %}
21654 ins_pipe( pipe_slow );
21655 %}
21656
21657 // --------------------------------- OR ---------------------------------------
21658
21659 instruct vor(vec dst, vec src) %{
21660 predicate(UseAVX == 0);
21661 match(Set dst (OrV dst src));
21662 format %{ "por $dst,$src\t! or vectors" %}
21663 ins_encode %{
21664 __ por($dst$$XMMRegister, $src$$XMMRegister);
21665 %}
21666 ins_pipe( pipe_slow );
21667 %}
21668
21669 instruct vor_reg(vec dst, vec src1, vec src2) %{
21670 predicate(UseAVX > 0);
21671 match(Set dst (OrV src1 src2));
21672 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21673 ins_encode %{
21674 int vlen_enc = vector_length_encoding(this);
21675 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21676 %}
21677 ins_pipe( pipe_slow );
21678 %}
21679
21680 instruct vor_mem(vec dst, vec src, memory mem) %{
21681 predicate((UseAVX > 0) &&
21682 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21683 match(Set dst (OrV src (LoadVector mem)));
21684 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21685 ins_encode %{
21686 int vlen_enc = vector_length_encoding(this);
21687 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21688 %}
21689 ins_pipe( pipe_slow );
21690 %}
21691
21692 // --------------------------------- XOR --------------------------------------
21693
21694 instruct vxor(vec dst, vec src) %{
21695 predicate(UseAVX == 0);
21696 match(Set dst (XorV dst src));
21697 format %{ "pxor $dst,$src\t! xor vectors" %}
21698 ins_encode %{
21699 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21700 %}
21701 ins_pipe( pipe_slow );
21702 %}
21703
21704 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21705 predicate(UseAVX > 0);
21706 match(Set dst (XorV src1 src2));
21707 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21708 ins_encode %{
21709 int vlen_enc = vector_length_encoding(this);
21710 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21711 %}
21712 ins_pipe( pipe_slow );
21713 %}
21714
21715 instruct vxor_mem(vec dst, vec src, memory mem) %{
21716 predicate((UseAVX > 0) &&
21717 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21718 match(Set dst (XorV src (LoadVector mem)));
21719 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21720 ins_encode %{
21721 int vlen_enc = vector_length_encoding(this);
21722 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21723 %}
21724 ins_pipe( pipe_slow );
21725 %}
21726
21727 // --------------------------------- VectorCast --------------------------------------
21728
21729 instruct vcastBtoX(vec dst, vec src) %{
21730 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21731 match(Set dst (VectorCastB2X src));
21732 format %{ "vector_cast_b2x $dst,$src\t!" %}
21733 ins_encode %{
21734 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21735 int vlen_enc = vector_length_encoding(this);
21736 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21737 %}
21738 ins_pipe( pipe_slow );
21739 %}
21740
21741 instruct vcastBtoD(legVec dst, legVec src) %{
21742 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21743 match(Set dst (VectorCastB2X src));
21744 format %{ "vector_cast_b2x $dst,$src\t!" %}
21745 ins_encode %{
21746 int vlen_enc = vector_length_encoding(this);
21747 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21748 %}
21749 ins_pipe( pipe_slow );
21750 %}
21751
21752 instruct castStoX(vec dst, vec src) %{
21753 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21754 Matcher::vector_length(n->in(1)) <= 8 && // src
21755 Matcher::vector_element_basic_type(n) == T_BYTE);
21756 match(Set dst (VectorCastS2X src));
21757 format %{ "vector_cast_s2x $dst,$src" %}
21758 ins_encode %{
21759 assert(UseAVX > 0, "required");
21760
21761 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21762 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21763 %}
21764 ins_pipe( pipe_slow );
21765 %}
21766
21767 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21768 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21769 Matcher::vector_length(n->in(1)) == 16 && // src
21770 Matcher::vector_element_basic_type(n) == T_BYTE);
21771 effect(TEMP dst, TEMP vtmp);
21772 match(Set dst (VectorCastS2X src));
21773 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21774 ins_encode %{
21775 assert(UseAVX > 0, "required");
21776
21777 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21778 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21779 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21780 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21781 %}
21782 ins_pipe( pipe_slow );
21783 %}
21784
21785 instruct vcastStoX_evex(vec dst, vec src) %{
21786 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21787 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21788 match(Set dst (VectorCastS2X src));
21789 format %{ "vector_cast_s2x $dst,$src\t!" %}
21790 ins_encode %{
21791 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21792 int src_vlen_enc = vector_length_encoding(this, $src);
21793 int vlen_enc = vector_length_encoding(this);
21794 switch (to_elem_bt) {
21795 case T_BYTE:
21796 if (!VM_Version::supports_avx512vl()) {
21797 vlen_enc = Assembler::AVX_512bit;
21798 }
21799 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21800 break;
21801 case T_INT:
21802 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21803 break;
21804 case T_FLOAT:
21805 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21806 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21807 break;
21808 case T_LONG:
21809 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21810 break;
21811 case T_DOUBLE: {
21812 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21813 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21814 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21815 break;
21816 }
21817 default:
21818 ShouldNotReachHere();
21819 }
21820 %}
21821 ins_pipe( pipe_slow );
21822 %}
21823
21824 instruct castItoX(vec dst, vec src) %{
21825 predicate(UseAVX <= 2 &&
21826 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21827 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21828 match(Set dst (VectorCastI2X src));
21829 format %{ "vector_cast_i2x $dst,$src" %}
21830 ins_encode %{
21831 assert(UseAVX > 0, "required");
21832
21833 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21834 int vlen_enc = vector_length_encoding(this, $src);
21835
21836 if (to_elem_bt == T_BYTE) {
21837 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21838 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21839 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21840 } else {
21841 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21842 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21843 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21844 }
21845 %}
21846 ins_pipe( pipe_slow );
21847 %}
21848
21849 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21850 predicate(UseAVX <= 2 &&
21851 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21852 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21853 match(Set dst (VectorCastI2X src));
21854 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21855 effect(TEMP dst, TEMP vtmp);
21856 ins_encode %{
21857 assert(UseAVX > 0, "required");
21858
21859 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21860 int vlen_enc = vector_length_encoding(this, $src);
21861
21862 if (to_elem_bt == T_BYTE) {
21863 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21864 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21865 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21866 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21867 } else {
21868 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21869 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21870 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21871 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21872 }
21873 %}
21874 ins_pipe( pipe_slow );
21875 %}
21876
21877 instruct vcastItoX_evex(vec dst, vec src) %{
21878 predicate(UseAVX > 2 ||
21879 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21880 match(Set dst (VectorCastI2X src));
21881 format %{ "vector_cast_i2x $dst,$src\t!" %}
21882 ins_encode %{
21883 assert(UseAVX > 0, "required");
21884
21885 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21886 int src_vlen_enc = vector_length_encoding(this, $src);
21887 int dst_vlen_enc = vector_length_encoding(this);
21888 switch (dst_elem_bt) {
21889 case T_BYTE:
21890 if (!VM_Version::supports_avx512vl()) {
21891 src_vlen_enc = Assembler::AVX_512bit;
21892 }
21893 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21894 break;
21895 case T_SHORT:
21896 if (!VM_Version::supports_avx512vl()) {
21897 src_vlen_enc = Assembler::AVX_512bit;
21898 }
21899 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21900 break;
21901 case T_FLOAT:
21902 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21903 break;
21904 case T_LONG:
21905 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21906 break;
21907 case T_DOUBLE:
21908 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21909 break;
21910 default:
21911 ShouldNotReachHere();
21912 }
21913 %}
21914 ins_pipe( pipe_slow );
21915 %}
21916
21917 instruct vcastLtoBS(vec dst, vec src) %{
21918 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21919 UseAVX <= 2);
21920 match(Set dst (VectorCastL2X src));
21921 format %{ "vector_cast_l2x $dst,$src" %}
21922 ins_encode %{
21923 assert(UseAVX > 0, "required");
21924
21925 int vlen = Matcher::vector_length_in_bytes(this, $src);
21926 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21927 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21928 : ExternalAddress(vector_int_to_short_mask());
21929 if (vlen <= 16) {
21930 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21931 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21932 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21933 } else {
21934 assert(vlen <= 32, "required");
21935 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21936 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21937 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21938 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21939 }
21940 if (to_elem_bt == T_BYTE) {
21941 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21942 }
21943 %}
21944 ins_pipe( pipe_slow );
21945 %}
21946
21947 instruct vcastLtoX_evex(vec dst, vec src) %{
21948 predicate(UseAVX > 2 ||
21949 (Matcher::vector_element_basic_type(n) == T_INT ||
21950 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21951 Matcher::vector_element_basic_type(n) == T_DOUBLE));
21952 match(Set dst (VectorCastL2X src));
21953 format %{ "vector_cast_l2x $dst,$src\t!" %}
21954 ins_encode %{
21955 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21956 int vlen = Matcher::vector_length_in_bytes(this, $src);
21957 int vlen_enc = vector_length_encoding(this, $src);
21958 switch (to_elem_bt) {
21959 case T_BYTE:
21960 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21961 vlen_enc = Assembler::AVX_512bit;
21962 }
21963 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21964 break;
21965 case T_SHORT:
21966 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21967 vlen_enc = Assembler::AVX_512bit;
21968 }
21969 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21970 break;
21971 case T_INT:
21972 if (vlen == 8) {
21973 if ($dst$$XMMRegister != $src$$XMMRegister) {
21974 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21975 }
21976 } else if (vlen == 16) {
21977 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21978 } else if (vlen == 32) {
21979 if (UseAVX > 2) {
21980 if (!VM_Version::supports_avx512vl()) {
21981 vlen_enc = Assembler::AVX_512bit;
21982 }
21983 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21984 } else {
21985 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21986 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21987 }
21988 } else { // vlen == 64
21989 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21990 }
21991 break;
21992 case T_FLOAT:
21993 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21994 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21995 break;
21996 case T_DOUBLE:
21997 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21998 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21999 break;
22000
22001 default: assert(false, "%s", type2name(to_elem_bt));
22002 }
22003 %}
22004 ins_pipe( pipe_slow );
22005 %}
22006
22007 instruct vcastFtoD_reg(vec dst, vec src) %{
22008 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22009 match(Set dst (VectorCastF2X src));
22010 format %{ "vector_cast_f2d $dst,$src\t!" %}
22011 ins_encode %{
22012 int vlen_enc = vector_length_encoding(this);
22013 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22014 %}
22015 ins_pipe( pipe_slow );
22016 %}
22017
22018
22019 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22020 predicate(!VM_Version::supports_avx10_2() &&
22021 !VM_Version::supports_avx512vl() &&
22022 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22023 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22024 is_integral_type(Matcher::vector_element_basic_type(n)));
22025 match(Set dst (VectorCastF2X src));
22026 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22027 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22028 ins_encode %{
22029 int vlen_enc = vector_length_encoding(this, $src);
22030 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22031 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22032 // 32 bit addresses for register indirect addressing mode since stub constants
22033 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22034 // However, targets are free to increase this limit, but having a large code cache size
22035 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22036 // cap we save a temporary register allocation which in limiting case can prevent
22037 // spilling in high register pressure blocks.
22038 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22039 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22040 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22041 %}
22042 ins_pipe( pipe_slow );
22043 %}
22044
22045 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22046 predicate(!VM_Version::supports_avx10_2() &&
22047 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22048 is_integral_type(Matcher::vector_element_basic_type(n)));
22049 match(Set dst (VectorCastF2X src));
22050 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22051 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22052 ins_encode %{
22053 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22054 if (to_elem_bt == T_LONG) {
22055 int vlen_enc = vector_length_encoding(this);
22056 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22057 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22058 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22059 } else {
22060 int vlen_enc = vector_length_encoding(this, $src);
22061 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22062 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22063 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22064 }
22065 %}
22066 ins_pipe( pipe_slow );
22067 %}
22068
22069 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22070 predicate(VM_Version::supports_avx10_2() &&
22071 is_integral_type(Matcher::vector_element_basic_type(n)));
22072 match(Set dst (VectorCastF2X src));
22073 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22074 ins_encode %{
22075 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22076 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22077 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22078 %}
22079 ins_pipe( pipe_slow );
22080 %}
22081
22082 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22083 predicate(VM_Version::supports_avx10_2() &&
22084 is_integral_type(Matcher::vector_element_basic_type(n)));
22085 match(Set dst (VectorCastF2X (LoadVector src)));
22086 format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22087 ins_encode %{
22088 int vlen = Matcher::vector_length(this);
22089 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22090 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22091 __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22092 %}
22093 ins_pipe( pipe_slow );
22094 %}
22095
22096 instruct vcastDtoF_reg(vec dst, vec src) %{
22097 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22098 match(Set dst (VectorCastD2X src));
22099 format %{ "vector_cast_d2x $dst,$src\t!" %}
22100 ins_encode %{
22101 int vlen_enc = vector_length_encoding(this, $src);
22102 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22103 %}
22104 ins_pipe( pipe_slow );
22105 %}
22106
22107 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22108 predicate(!VM_Version::supports_avx10_2() &&
22109 !VM_Version::supports_avx512vl() &&
22110 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22111 is_integral_type(Matcher::vector_element_basic_type(n)));
22112 match(Set dst (VectorCastD2X src));
22113 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22114 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22115 ins_encode %{
22116 int vlen_enc = vector_length_encoding(this, $src);
22117 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22118 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22119 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22120 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22121 %}
22122 ins_pipe( pipe_slow );
22123 %}
22124
22125 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22126 predicate(!VM_Version::supports_avx10_2() &&
22127 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22128 is_integral_type(Matcher::vector_element_basic_type(n)));
22129 match(Set dst (VectorCastD2X src));
22130 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22131 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22132 ins_encode %{
22133 int vlen_enc = vector_length_encoding(this, $src);
22134 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22135 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22136 ExternalAddress(vector_float_signflip());
22137 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22138 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22139 %}
22140 ins_pipe( pipe_slow );
22141 %}
22142
22143 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22144 predicate(VM_Version::supports_avx10_2() &&
22145 is_integral_type(Matcher::vector_element_basic_type(n)));
22146 match(Set dst (VectorCastD2X src));
22147 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22148 ins_encode %{
22149 int vlen_enc = vector_length_encoding(this, $src);
22150 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22151 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22152 %}
22153 ins_pipe( pipe_slow );
22154 %}
22155
22156 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22157 predicate(VM_Version::supports_avx10_2() &&
22158 is_integral_type(Matcher::vector_element_basic_type(n)));
22159 match(Set dst (VectorCastD2X (LoadVector src)));
22160 format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22161 ins_encode %{
22162 int vlen = Matcher::vector_length(this);
22163 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22164 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22165 __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22166 %}
22167 ins_pipe( pipe_slow );
22168 %}
22169
22170 instruct vucast(vec dst, vec src) %{
22171 match(Set dst (VectorUCastB2X src));
22172 match(Set dst (VectorUCastS2X src));
22173 match(Set dst (VectorUCastI2X src));
22174 format %{ "vector_ucast $dst,$src\t!" %}
22175 ins_encode %{
22176 assert(UseAVX > 0, "required");
22177
22178 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22179 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22180 int vlen_enc = vector_length_encoding(this);
22181 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22182 %}
22183 ins_pipe( pipe_slow );
22184 %}
22185
22186 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22187 predicate(!VM_Version::supports_avx512vl() &&
22188 Matcher::vector_length_in_bytes(n) < 64 &&
22189 Matcher::vector_element_basic_type(n) == T_INT);
22190 match(Set dst (RoundVF src));
22191 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22192 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22193 ins_encode %{
22194 int vlen_enc = vector_length_encoding(this);
22195 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22196 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22197 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22198 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22199 %}
22200 ins_pipe( pipe_slow );
22201 %}
22202
22203 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22204 predicate((VM_Version::supports_avx512vl() ||
22205 Matcher::vector_length_in_bytes(n) == 64) &&
22206 Matcher::vector_element_basic_type(n) == T_INT);
22207 match(Set dst (RoundVF src));
22208 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22209 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22210 ins_encode %{
22211 int vlen_enc = vector_length_encoding(this);
22212 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22213 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22214 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22215 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22216 %}
22217 ins_pipe( pipe_slow );
22218 %}
22219
22220 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22221 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22222 match(Set dst (RoundVD src));
22223 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22224 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22225 ins_encode %{
22226 int vlen_enc = vector_length_encoding(this);
22227 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22228 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22229 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22230 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22231 %}
22232 ins_pipe( pipe_slow );
22233 %}
22234
22235 // --------------------------------- VectorMaskCmp --------------------------------------
22236
22237 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22238 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22239 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22240 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22241 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22242 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22243 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22244 ins_encode %{
22245 int vlen_enc = vector_length_encoding(this, $src1);
22246 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22247 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22248 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22249 } else {
22250 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22251 }
22252 %}
22253 ins_pipe( pipe_slow );
22254 %}
22255
22256 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22257 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22258 n->bottom_type()->isa_vectmask() == nullptr &&
22259 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22260 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22261 effect(TEMP ktmp);
22262 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22263 ins_encode %{
22264 int vlen_enc = Assembler::AVX_512bit;
22265 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22266 KRegister mask = k0; // The comparison itself is not being masked.
22267 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22268 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22269 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22270 } else {
22271 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22272 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22273 }
22274 %}
22275 ins_pipe( pipe_slow );
22276 %}
22277
22278 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22279 predicate(n->bottom_type()->isa_vectmask() &&
22280 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22281 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22282 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22283 ins_encode %{
22284 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22285 int vlen_enc = vector_length_encoding(this, $src1);
22286 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22287 KRegister mask = k0; // The comparison itself is not being masked.
22288 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22289 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22290 } else {
22291 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22292 }
22293 %}
22294 ins_pipe( pipe_slow );
22295 %}
22296
22297 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22298 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22299 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22300 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22301 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22302 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22303 (n->in(2)->get_int() == BoolTest::eq ||
22304 n->in(2)->get_int() == BoolTest::lt ||
22305 n->in(2)->get_int() == BoolTest::gt)); // cond
22306 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22307 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22308 ins_encode %{
22309 int vlen_enc = vector_length_encoding(this, $src1);
22310 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22311 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22312 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22313 %}
22314 ins_pipe( pipe_slow );
22315 %}
22316
22317 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22318 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22319 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22320 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22321 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22322 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22323 (n->in(2)->get_int() == BoolTest::ne ||
22324 n->in(2)->get_int() == BoolTest::le ||
22325 n->in(2)->get_int() == BoolTest::ge)); // cond
22326 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22327 effect(TEMP dst, TEMP xtmp);
22328 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22329 ins_encode %{
22330 int vlen_enc = vector_length_encoding(this, $src1);
22331 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22332 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22333 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22334 %}
22335 ins_pipe( pipe_slow );
22336 %}
22337
22338 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22339 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22340 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22341 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22342 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22343 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22344 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22345 effect(TEMP dst, TEMP xtmp);
22346 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22347 ins_encode %{
22348 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22349 int vlen_enc = vector_length_encoding(this, $src1);
22350 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22351 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22352
22353 if (vlen_enc == Assembler::AVX_128bit) {
22354 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22355 } else {
22356 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22357 }
22358 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22359 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22360 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22361 %}
22362 ins_pipe( pipe_slow );
22363 %}
22364
22365 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22366 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22367 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22368 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22369 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22370 effect(TEMP ktmp);
22371 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22372 ins_encode %{
22373 assert(UseAVX > 2, "required");
22374
22375 int vlen_enc = vector_length_encoding(this, $src1);
22376 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22377 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22378 KRegister mask = k0; // The comparison itself is not being masked.
22379 bool merge = false;
22380 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22381
22382 switch (src1_elem_bt) {
22383 case T_INT: {
22384 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22385 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22386 break;
22387 }
22388 case T_LONG: {
22389 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22390 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22391 break;
22392 }
22393 default: assert(false, "%s", type2name(src1_elem_bt));
22394 }
22395 %}
22396 ins_pipe( pipe_slow );
22397 %}
22398
22399
22400 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22401 predicate(n->bottom_type()->isa_vectmask() &&
22402 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22403 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22404 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22405 ins_encode %{
22406 assert(UseAVX > 2, "required");
22407 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22408
22409 int vlen_enc = vector_length_encoding(this, $src1);
22410 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22411 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22412 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22413
22414 // Comparison i
22415 switch (src1_elem_bt) {
22416 case T_BYTE: {
22417 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22418 break;
22419 }
22420 case T_SHORT: {
22421 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22422 break;
22423 }
22424 case T_INT: {
22425 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22426 break;
22427 }
22428 case T_LONG: {
22429 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22430 break;
22431 }
22432 default: assert(false, "%s", type2name(src1_elem_bt));
22433 }
22434 %}
22435 ins_pipe( pipe_slow );
22436 %}
22437
22438 // Extract
22439
22440 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22441 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22442 match(Set dst (ExtractI src idx));
22443 match(Set dst (ExtractS src idx));
22444 match(Set dst (ExtractB src idx));
22445 format %{ "extractI $dst,$src,$idx\t!" %}
22446 ins_encode %{
22447 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22448
22449 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22450 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22451 %}
22452 ins_pipe( pipe_slow );
22453 %}
22454
22455 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22456 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22457 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22458 match(Set dst (ExtractI src idx));
22459 match(Set dst (ExtractS src idx));
22460 match(Set dst (ExtractB src idx));
22461 effect(TEMP vtmp);
22462 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22463 ins_encode %{
22464 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22465
22466 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22467 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22468 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22469 %}
22470 ins_pipe( pipe_slow );
22471 %}
22472
22473 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22474 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22475 match(Set dst (ExtractL src idx));
22476 format %{ "extractL $dst,$src,$idx\t!" %}
22477 ins_encode %{
22478 assert(UseSSE >= 4, "required");
22479 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22480
22481 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22482 %}
22483 ins_pipe( pipe_slow );
22484 %}
22485
22486 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22487 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22488 Matcher::vector_length(n->in(1)) == 8); // src
22489 match(Set dst (ExtractL src idx));
22490 effect(TEMP vtmp);
22491 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22492 ins_encode %{
22493 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22494
22495 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22496 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22497 %}
22498 ins_pipe( pipe_slow );
22499 %}
22500
22501 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22502 predicate(Matcher::vector_length(n->in(1)) <= 4);
22503 match(Set dst (ExtractF src idx));
22504 effect(TEMP dst, TEMP vtmp);
22505 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22506 ins_encode %{
22507 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22508
22509 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22510 %}
22511 ins_pipe( pipe_slow );
22512 %}
22513
22514 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22515 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22516 Matcher::vector_length(n->in(1)/*src*/) == 16);
22517 match(Set dst (ExtractF src idx));
22518 effect(TEMP vtmp);
22519 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22520 ins_encode %{
22521 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22522
22523 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22524 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22525 %}
22526 ins_pipe( pipe_slow );
22527 %}
22528
22529 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22530 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22531 match(Set dst (ExtractD src idx));
22532 format %{ "extractD $dst,$src,$idx\t!" %}
22533 ins_encode %{
22534 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22535
22536 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22537 %}
22538 ins_pipe( pipe_slow );
22539 %}
22540
22541 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22542 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22543 Matcher::vector_length(n->in(1)) == 8); // src
22544 match(Set dst (ExtractD src idx));
22545 effect(TEMP vtmp);
22546 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22547 ins_encode %{
22548 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22549
22550 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22551 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22552 %}
22553 ins_pipe( pipe_slow );
22554 %}
22555
22556 // --------------------------------- Vector Blend --------------------------------------
22557
22558 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22559 predicate(UseAVX == 0);
22560 match(Set dst (VectorBlend (Binary dst src) mask));
22561 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22562 effect(TEMP tmp);
22563 ins_encode %{
22564 assert(UseSSE >= 4, "required");
22565
22566 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22567 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22568 }
22569 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22570 %}
22571 ins_pipe( pipe_slow );
22572 %}
22573
22574 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22575 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22576 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22577 Matcher::vector_length_in_bytes(n) <= 32 &&
22578 is_integral_type(Matcher::vector_element_basic_type(n)));
22579 match(Set dst (VectorBlend (Binary src1 src2) mask));
22580 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22581 ins_encode %{
22582 int vlen_enc = vector_length_encoding(this);
22583 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22584 %}
22585 ins_pipe( pipe_slow );
22586 %}
22587
22588 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22589 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22590 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22591 Matcher::vector_length_in_bytes(n) <= 32 &&
22592 !is_integral_type(Matcher::vector_element_basic_type(n)));
22593 match(Set dst (VectorBlend (Binary src1 src2) mask));
22594 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22595 ins_encode %{
22596 int vlen_enc = vector_length_encoding(this);
22597 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22598 %}
22599 ins_pipe( pipe_slow );
22600 %}
22601
22602 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22603 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22604 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22605 Matcher::vector_length_in_bytes(n) <= 32);
22606 match(Set dst (VectorBlend (Binary src1 src2) mask));
22607 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22608 effect(TEMP vtmp, TEMP dst);
22609 ins_encode %{
22610 int vlen_enc = vector_length_encoding(this);
22611 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22612 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22613 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22614 %}
22615 ins_pipe( pipe_slow );
22616 %}
22617
22618 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22619 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22620 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22621 match(Set dst (VectorBlend (Binary src1 src2) mask));
22622 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22623 effect(TEMP ktmp);
22624 ins_encode %{
22625 int vlen_enc = Assembler::AVX_512bit;
22626 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22627 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22628 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22629 %}
22630 ins_pipe( pipe_slow );
22631 %}
22632
22633
22634 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22635 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22636 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22637 VM_Version::supports_avx512bw()));
22638 match(Set dst (VectorBlend (Binary src1 src2) mask));
22639 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22640 ins_encode %{
22641 int vlen_enc = vector_length_encoding(this);
22642 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22643 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22644 %}
22645 ins_pipe( pipe_slow );
22646 %}
22647
22648 // --------------------------------- ABS --------------------------------------
22649 // a = |a|
22650 instruct vabsB_reg(vec dst, vec src) %{
22651 match(Set dst (AbsVB src));
22652 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22653 ins_encode %{
22654 uint vlen = Matcher::vector_length(this);
22655 if (vlen <= 16) {
22656 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22657 } else {
22658 int vlen_enc = vector_length_encoding(this);
22659 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22660 }
22661 %}
22662 ins_pipe( pipe_slow );
22663 %}
22664
22665 instruct vabsS_reg(vec dst, vec src) %{
22666 match(Set dst (AbsVS src));
22667 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22668 ins_encode %{
22669 uint vlen = Matcher::vector_length(this);
22670 if (vlen <= 8) {
22671 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22672 } else {
22673 int vlen_enc = vector_length_encoding(this);
22674 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22675 }
22676 %}
22677 ins_pipe( pipe_slow );
22678 %}
22679
22680 instruct vabsI_reg(vec dst, vec src) %{
22681 match(Set dst (AbsVI src));
22682 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22683 ins_encode %{
22684 uint vlen = Matcher::vector_length(this);
22685 if (vlen <= 4) {
22686 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22687 } else {
22688 int vlen_enc = vector_length_encoding(this);
22689 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22690 }
22691 %}
22692 ins_pipe( pipe_slow );
22693 %}
22694
22695 instruct vabsL_reg(vec dst, vec src) %{
22696 match(Set dst (AbsVL src));
22697 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22698 ins_encode %{
22699 assert(UseAVX > 2, "required");
22700 int vlen_enc = vector_length_encoding(this);
22701 if (!VM_Version::supports_avx512vl()) {
22702 vlen_enc = Assembler::AVX_512bit;
22703 }
22704 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22705 %}
22706 ins_pipe( pipe_slow );
22707 %}
22708
22709 // --------------------------------- ABSNEG --------------------------------------
22710
22711 instruct vabsnegF(vec dst, vec src) %{
22712 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22713 match(Set dst (AbsVF src));
22714 match(Set dst (NegVF src));
22715 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22716 ins_cost(150);
22717 ins_encode %{
22718 int opcode = this->ideal_Opcode();
22719 int vlen = Matcher::vector_length(this);
22720 if (vlen == 2) {
22721 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22722 } else {
22723 assert(vlen == 8 || vlen == 16, "required");
22724 int vlen_enc = vector_length_encoding(this);
22725 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22726 }
22727 %}
22728 ins_pipe( pipe_slow );
22729 %}
22730
22731 instruct vabsneg4F(vec dst) %{
22732 predicate(Matcher::vector_length(n) == 4);
22733 match(Set dst (AbsVF dst));
22734 match(Set dst (NegVF dst));
22735 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22736 ins_cost(150);
22737 ins_encode %{
22738 int opcode = this->ideal_Opcode();
22739 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22740 %}
22741 ins_pipe( pipe_slow );
22742 %}
22743
22744 instruct vabsnegD(vec dst, vec src) %{
22745 match(Set dst (AbsVD src));
22746 match(Set dst (NegVD src));
22747 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22748 ins_encode %{
22749 int opcode = this->ideal_Opcode();
22750 uint vlen = Matcher::vector_length(this);
22751 if (vlen == 2) {
22752 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22753 } else {
22754 int vlen_enc = vector_length_encoding(this);
22755 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22756 }
22757 %}
22758 ins_pipe( pipe_slow );
22759 %}
22760
22761 //------------------------------------- VectorTest --------------------------------------------
22762
22763 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22764 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22765 match(Set cr (VectorTest src1 src2));
22766 effect(TEMP vtmp);
22767 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22768 ins_encode %{
22769 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22770 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22771 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22772 %}
22773 ins_pipe( pipe_slow );
22774 %}
22775
22776 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22777 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22778 match(Set cr (VectorTest src1 src2));
22779 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22780 ins_encode %{
22781 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22782 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22783 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22784 %}
22785 ins_pipe( pipe_slow );
22786 %}
22787
22788 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22789 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22790 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22791 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22792 match(Set cr (VectorTest src1 src2));
22793 effect(TEMP tmp);
22794 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22795 ins_encode %{
22796 uint masklen = Matcher::vector_length(this, $src1);
22797 __ kmovwl($tmp$$Register, $src1$$KRegister);
22798 __ andl($tmp$$Register, (1 << masklen) - 1);
22799 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22800 %}
22801 ins_pipe( pipe_slow );
22802 %}
22803
22804 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22805 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22806 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22807 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22808 match(Set cr (VectorTest src1 src2));
22809 effect(TEMP tmp);
22810 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22811 ins_encode %{
22812 uint masklen = Matcher::vector_length(this, $src1);
22813 __ kmovwl($tmp$$Register, $src1$$KRegister);
22814 __ andl($tmp$$Register, (1 << masklen) - 1);
22815 %}
22816 ins_pipe( pipe_slow );
22817 %}
22818
22819 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22820 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22821 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22822 match(Set cr (VectorTest src1 src2));
22823 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22824 ins_encode %{
22825 uint masklen = Matcher::vector_length(this, $src1);
22826 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22827 %}
22828 ins_pipe( pipe_slow );
22829 %}
22830
22831 //------------------------------------- LoadMask --------------------------------------------
22832
22833 instruct loadMask(legVec dst, legVec src) %{
22834 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22835 match(Set dst (VectorLoadMask src));
22836 effect(TEMP dst);
22837 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22838 ins_encode %{
22839 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22840 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22841 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22842 %}
22843 ins_pipe( pipe_slow );
22844 %}
22845
22846 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22847 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22848 match(Set dst (VectorLoadMask src));
22849 effect(TEMP xtmp);
22850 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22851 ins_encode %{
22852 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22853 true, Assembler::AVX_512bit);
22854 %}
22855 ins_pipe( pipe_slow );
22856 %}
22857
22858 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22859 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22860 match(Set dst (VectorLoadMask src));
22861 effect(TEMP xtmp);
22862 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22863 ins_encode %{
22864 int vlen_enc = vector_length_encoding(in(1));
22865 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22866 false, vlen_enc);
22867 %}
22868 ins_pipe( pipe_slow );
22869 %}
22870
22871 //------------------------------------- StoreMask --------------------------------------------
22872
22873 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22874 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22875 match(Set dst (VectorStoreMask src size));
22876 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22877 ins_encode %{
22878 int vlen = Matcher::vector_length(this);
22879 if (vlen <= 16 && UseAVX <= 2) {
22880 assert(UseSSE >= 3, "required");
22881 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22882 } else {
22883 assert(UseAVX > 0, "required");
22884 int src_vlen_enc = vector_length_encoding(this, $src);
22885 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22886 }
22887 %}
22888 ins_pipe( pipe_slow );
22889 %}
22890
22891 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22892 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22893 match(Set dst (VectorStoreMask src size));
22894 effect(TEMP_DEF dst, TEMP xtmp);
22895 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22896 ins_encode %{
22897 int vlen_enc = Assembler::AVX_128bit;
22898 int vlen = Matcher::vector_length(this);
22899 if (vlen <= 8) {
22900 assert(UseSSE >= 3, "required");
22901 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22902 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22903 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22904 } else {
22905 assert(UseAVX > 0, "required");
22906 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22907 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22908 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22909 }
22910 %}
22911 ins_pipe( pipe_slow );
22912 %}
22913
22914 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22915 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22916 match(Set dst (VectorStoreMask src size));
22917 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22918 effect(TEMP_DEF dst, TEMP xtmp);
22919 ins_encode %{
22920 int vlen_enc = Assembler::AVX_128bit;
22921 int vlen = Matcher::vector_length(this);
22922 if (vlen <= 4) {
22923 assert(UseSSE >= 3, "required");
22924 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22925 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22926 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22927 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22928 } else {
22929 assert(UseAVX > 0, "required");
22930 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22931 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22932 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22933 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22934 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22935 }
22936 %}
22937 ins_pipe( pipe_slow );
22938 %}
22939
22940 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22941 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22942 match(Set dst (VectorStoreMask src size));
22943 effect(TEMP_DEF dst, TEMP xtmp);
22944 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22945 ins_encode %{
22946 assert(UseSSE >= 3, "required");
22947 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22948 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22949 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22950 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22951 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22952 %}
22953 ins_pipe( pipe_slow );
22954 %}
22955
22956 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22957 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22958 match(Set dst (VectorStoreMask src size));
22959 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22960 effect(TEMP_DEF dst, TEMP vtmp);
22961 ins_encode %{
22962 int vlen_enc = Assembler::AVX_128bit;
22963 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22964 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22965 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22966 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22967 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22968 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22969 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22970 %}
22971 ins_pipe( pipe_slow );
22972 %}
22973
22974 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22975 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22976 match(Set dst (VectorStoreMask src size));
22977 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22978 ins_encode %{
22979 int src_vlen_enc = vector_length_encoding(this, $src);
22980 int dst_vlen_enc = vector_length_encoding(this);
22981 if (!VM_Version::supports_avx512vl()) {
22982 src_vlen_enc = Assembler::AVX_512bit;
22983 }
22984 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22985 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22986 %}
22987 ins_pipe( pipe_slow );
22988 %}
22989
22990 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22991 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22992 match(Set dst (VectorStoreMask src size));
22993 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22994 ins_encode %{
22995 int src_vlen_enc = vector_length_encoding(this, $src);
22996 int dst_vlen_enc = vector_length_encoding(this);
22997 if (!VM_Version::supports_avx512vl()) {
22998 src_vlen_enc = Assembler::AVX_512bit;
22999 }
23000 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23001 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23002 %}
23003 ins_pipe( pipe_slow );
23004 %}
23005
23006 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23007 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23008 match(Set dst (VectorStoreMask mask size));
23009 effect(TEMP_DEF dst);
23010 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23011 ins_encode %{
23012 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23013 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23014 false, Assembler::AVX_512bit, noreg);
23015 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23016 %}
23017 ins_pipe( pipe_slow );
23018 %}
23019
23020 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23021 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23022 match(Set dst (VectorStoreMask mask size));
23023 effect(TEMP_DEF dst);
23024 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23025 ins_encode %{
23026 int dst_vlen_enc = vector_length_encoding(this);
23027 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23028 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23029 %}
23030 ins_pipe( pipe_slow );
23031 %}
23032
23033 instruct vmaskcast_evex(kReg dst) %{
23034 match(Set dst (VectorMaskCast dst));
23035 ins_cost(0);
23036 format %{ "vector_mask_cast $dst" %}
23037 ins_encode %{
23038 // empty
23039 %}
23040 ins_pipe(empty);
23041 %}
23042
23043 instruct vmaskcast(vec dst) %{
23044 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23045 match(Set dst (VectorMaskCast dst));
23046 ins_cost(0);
23047 format %{ "vector_mask_cast $dst" %}
23048 ins_encode %{
23049 // empty
23050 %}
23051 ins_pipe(empty);
23052 %}
23053
23054 instruct vmaskcast_avx(vec dst, vec src) %{
23055 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23056 match(Set dst (VectorMaskCast src));
23057 format %{ "vector_mask_cast $dst, $src" %}
23058 ins_encode %{
23059 int vlen = Matcher::vector_length(this);
23060 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23061 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23062 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23063 %}
23064 ins_pipe(pipe_slow);
23065 %}
23066
23067 //-------------------------------- Load Iota Indices ----------------------------------
23068
23069 instruct loadIotaIndices(vec dst, immI_0 src) %{
23070 match(Set dst (VectorLoadConst src));
23071 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23072 ins_encode %{
23073 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23074 BasicType bt = Matcher::vector_element_basic_type(this);
23075 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23076 %}
23077 ins_pipe( pipe_slow );
23078 %}
23079
23080 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23081 match(Set dst (PopulateIndex src1 src2));
23082 effect(TEMP dst, TEMP vtmp);
23083 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23084 ins_encode %{
23085 assert($src2$$constant == 1, "required");
23086 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23087 int vlen_enc = vector_length_encoding(this);
23088 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23089 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23090 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23091 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23092 %}
23093 ins_pipe( pipe_slow );
23094 %}
23095
23096 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23097 match(Set dst (PopulateIndex src1 src2));
23098 effect(TEMP dst, TEMP vtmp);
23099 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23100 ins_encode %{
23101 assert($src2$$constant == 1, "required");
23102 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23103 int vlen_enc = vector_length_encoding(this);
23104 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23105 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23106 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23107 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23108 %}
23109 ins_pipe( pipe_slow );
23110 %}
23111
23112 //-------------------------------- Rearrange ----------------------------------
23113
23114 // LoadShuffle/Rearrange for Byte
23115 instruct rearrangeB(vec dst, vec shuffle) %{
23116 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23117 Matcher::vector_length(n) < 32);
23118 match(Set dst (VectorRearrange dst shuffle));
23119 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23120 ins_encode %{
23121 assert(UseSSE >= 4, "required");
23122 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23123 %}
23124 ins_pipe( pipe_slow );
23125 %}
23126
23127 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23128 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23129 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23130 match(Set dst (VectorRearrange src shuffle));
23131 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23132 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23133 ins_encode %{
23134 assert(UseAVX >= 2, "required");
23135 // Swap src into vtmp1
23136 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23137 // Shuffle swapped src to get entries from other 128 bit lane
23138 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23139 // Shuffle original src to get entries from self 128 bit lane
23140 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23141 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23142 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23143 // Perform the blend
23144 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23145 %}
23146 ins_pipe( pipe_slow );
23147 %}
23148
23149
23150 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23151 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23152 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23153 match(Set dst (VectorRearrange src shuffle));
23154 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23155 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23156 ins_encode %{
23157 int vlen_enc = vector_length_encoding(this);
23158 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23159 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23160 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23161 %}
23162 ins_pipe( pipe_slow );
23163 %}
23164
23165 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23166 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23167 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23168 match(Set dst (VectorRearrange src shuffle));
23169 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23170 ins_encode %{
23171 int vlen_enc = vector_length_encoding(this);
23172 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23173 %}
23174 ins_pipe( pipe_slow );
23175 %}
23176
23177 // LoadShuffle/Rearrange for Short
23178
23179 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23180 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23181 !VM_Version::supports_avx512bw());
23182 match(Set dst (VectorLoadShuffle src));
23183 effect(TEMP dst, TEMP vtmp);
23184 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23185 ins_encode %{
23186 // Create a byte shuffle mask from short shuffle mask
23187 // only byte shuffle instruction available on these platforms
23188 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23189 if (UseAVX == 0) {
23190 assert(vlen_in_bytes <= 16, "required");
23191 // Multiply each shuffle by two to get byte index
23192 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23193 __ psllw($vtmp$$XMMRegister, 1);
23194
23195 // Duplicate to create 2 copies of byte index
23196 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23197 __ psllw($dst$$XMMRegister, 8);
23198 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23199
23200 // Add one to get alternate byte index
23201 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23202 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23203 } else {
23204 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23205 int vlen_enc = vector_length_encoding(this);
23206 // Multiply each shuffle by two to get byte index
23207 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23208
23209 // Duplicate to create 2 copies of byte index
23210 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23211 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23212
23213 // Add one to get alternate byte index
23214 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23215 }
23216 %}
23217 ins_pipe( pipe_slow );
23218 %}
23219
23220 instruct rearrangeS(vec dst, vec shuffle) %{
23221 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23222 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23223 match(Set dst (VectorRearrange dst shuffle));
23224 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23225 ins_encode %{
23226 assert(UseSSE >= 4, "required");
23227 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23228 %}
23229 ins_pipe( pipe_slow );
23230 %}
23231
23232 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23233 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23234 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23235 match(Set dst (VectorRearrange src shuffle));
23236 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23237 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23238 ins_encode %{
23239 assert(UseAVX >= 2, "required");
23240 // Swap src into vtmp1
23241 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23242 // Shuffle swapped src to get entries from other 128 bit lane
23243 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23244 // Shuffle original src to get entries from self 128 bit lane
23245 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23246 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23247 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23248 // Perform the blend
23249 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23250 %}
23251 ins_pipe( pipe_slow );
23252 %}
23253
23254 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23255 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23256 VM_Version::supports_avx512bw());
23257 match(Set dst (VectorRearrange src shuffle));
23258 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23259 ins_encode %{
23260 int vlen_enc = vector_length_encoding(this);
23261 if (!VM_Version::supports_avx512vl()) {
23262 vlen_enc = Assembler::AVX_512bit;
23263 }
23264 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23265 %}
23266 ins_pipe( pipe_slow );
23267 %}
23268
23269 // LoadShuffle/Rearrange for Integer and Float
23270
23271 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23272 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23273 Matcher::vector_length(n) == 4 && UseAVX == 0);
23274 match(Set dst (VectorLoadShuffle src));
23275 effect(TEMP dst, TEMP vtmp);
23276 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23277 ins_encode %{
23278 assert(UseSSE >= 4, "required");
23279
23280 // Create a byte shuffle mask from int shuffle mask
23281 // only byte shuffle instruction available on these platforms
23282
23283 // Duplicate and multiply each shuffle by 4
23284 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23285 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23286 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23287 __ psllw($vtmp$$XMMRegister, 2);
23288
23289 // Duplicate again to create 4 copies of byte index
23290 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23291 __ psllw($dst$$XMMRegister, 8);
23292 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23293
23294 // Add 3,2,1,0 to get alternate byte index
23295 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23296 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23297 %}
23298 ins_pipe( pipe_slow );
23299 %}
23300
23301 instruct rearrangeI(vec dst, vec shuffle) %{
23302 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23303 UseAVX == 0);
23304 match(Set dst (VectorRearrange dst shuffle));
23305 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23306 ins_encode %{
23307 assert(UseSSE >= 4, "required");
23308 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23309 %}
23310 ins_pipe( pipe_slow );
23311 %}
23312
23313 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23314 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23315 UseAVX > 0);
23316 match(Set dst (VectorRearrange src shuffle));
23317 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23318 ins_encode %{
23319 int vlen_enc = vector_length_encoding(this);
23320 BasicType bt = Matcher::vector_element_basic_type(this);
23321 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23322 %}
23323 ins_pipe( pipe_slow );
23324 %}
23325
23326 // LoadShuffle/Rearrange for Long and Double
23327
23328 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23329 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23330 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23331 match(Set dst (VectorLoadShuffle src));
23332 effect(TEMP dst, TEMP vtmp);
23333 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23334 ins_encode %{
23335 assert(UseAVX >= 2, "required");
23336
23337 int vlen_enc = vector_length_encoding(this);
23338 // Create a double word shuffle mask from long shuffle mask
23339 // only double word shuffle instruction available on these platforms
23340
23341 // Multiply each shuffle by two to get double word index
23342 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23343
23344 // Duplicate each double word shuffle
23345 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23346 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23347
23348 // Add one to get alternate double word index
23349 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23350 %}
23351 ins_pipe( pipe_slow );
23352 %}
23353
23354 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23355 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23356 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23357 match(Set dst (VectorRearrange src shuffle));
23358 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23359 ins_encode %{
23360 assert(UseAVX >= 2, "required");
23361
23362 int vlen_enc = vector_length_encoding(this);
23363 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23364 %}
23365 ins_pipe( pipe_slow );
23366 %}
23367
23368 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23369 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23370 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23371 match(Set dst (VectorRearrange src shuffle));
23372 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23373 ins_encode %{
23374 assert(UseAVX > 2, "required");
23375
23376 int vlen_enc = vector_length_encoding(this);
23377 if (vlen_enc == Assembler::AVX_128bit) {
23378 vlen_enc = Assembler::AVX_256bit;
23379 }
23380 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23381 %}
23382 ins_pipe( pipe_slow );
23383 %}
23384
23385 // --------------------------------- FMA --------------------------------------
23386 // a * b + c
23387
23388 instruct vfmaF_reg(vec a, vec b, vec c) %{
23389 match(Set c (FmaVF c (Binary a b)));
23390 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23391 ins_cost(150);
23392 ins_encode %{
23393 assert(UseFMA, "not enabled");
23394 int vlen_enc = vector_length_encoding(this);
23395 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23396 %}
23397 ins_pipe( pipe_slow );
23398 %}
23399
23400 instruct vfmaF_mem(vec a, memory b, vec c) %{
23401 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23402 match(Set c (FmaVF c (Binary a (LoadVector b))));
23403 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23404 ins_cost(150);
23405 ins_encode %{
23406 assert(UseFMA, "not enabled");
23407 int vlen_enc = vector_length_encoding(this);
23408 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23409 %}
23410 ins_pipe( pipe_slow );
23411 %}
23412
23413 instruct vfmaD_reg(vec a, vec b, vec c) %{
23414 match(Set c (FmaVD c (Binary a b)));
23415 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23416 ins_cost(150);
23417 ins_encode %{
23418 assert(UseFMA, "not enabled");
23419 int vlen_enc = vector_length_encoding(this);
23420 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23421 %}
23422 ins_pipe( pipe_slow );
23423 %}
23424
23425 instruct vfmaD_mem(vec a, memory b, vec c) %{
23426 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23427 match(Set c (FmaVD c (Binary a (LoadVector b))));
23428 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23429 ins_cost(150);
23430 ins_encode %{
23431 assert(UseFMA, "not enabled");
23432 int vlen_enc = vector_length_encoding(this);
23433 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23434 %}
23435 ins_pipe( pipe_slow );
23436 %}
23437
23438 // --------------------------------- Vector Multiply Add --------------------------------------
23439
23440 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23441 predicate(UseAVX == 0);
23442 match(Set dst (MulAddVS2VI dst src1));
23443 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23444 ins_encode %{
23445 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23446 %}
23447 ins_pipe( pipe_slow );
23448 %}
23449
23450 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23451 predicate(UseAVX > 0);
23452 match(Set dst (MulAddVS2VI src1 src2));
23453 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23454 ins_encode %{
23455 int vlen_enc = vector_length_encoding(this);
23456 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23457 %}
23458 ins_pipe( pipe_slow );
23459 %}
23460
23461 // --------------------------------- Vector Multiply Add Add ----------------------------------
23462
23463 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23464 predicate(VM_Version::supports_avx512_vnni());
23465 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23466 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23467 ins_encode %{
23468 assert(UseAVX > 2, "required");
23469 int vlen_enc = vector_length_encoding(this);
23470 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23471 %}
23472 ins_pipe( pipe_slow );
23473 ins_cost(10);
23474 %}
23475
23476 // --------------------------------- PopCount --------------------------------------
23477
23478 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23479 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23480 match(Set dst (PopCountVI src));
23481 match(Set dst (PopCountVL src));
23482 format %{ "vector_popcount_integral $dst, $src" %}
23483 ins_encode %{
23484 int opcode = this->ideal_Opcode();
23485 int vlen_enc = vector_length_encoding(this, $src);
23486 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23487 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23488 %}
23489 ins_pipe( pipe_slow );
23490 %}
23491
23492 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23493 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23494 match(Set dst (PopCountVI src mask));
23495 match(Set dst (PopCountVL src mask));
23496 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23497 ins_encode %{
23498 int vlen_enc = vector_length_encoding(this, $src);
23499 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23500 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23501 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23502 %}
23503 ins_pipe( pipe_slow );
23504 %}
23505
23506 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23507 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23508 match(Set dst (PopCountVI src));
23509 match(Set dst (PopCountVL src));
23510 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23511 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23512 ins_encode %{
23513 int opcode = this->ideal_Opcode();
23514 int vlen_enc = vector_length_encoding(this, $src);
23515 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23516 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23517 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23518 %}
23519 ins_pipe( pipe_slow );
23520 %}
23521
23522 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23523
23524 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23525 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23526 Matcher::vector_length_in_bytes(n->in(1))));
23527 match(Set dst (CountTrailingZerosV src));
23528 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23529 ins_cost(400);
23530 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23531 ins_encode %{
23532 int vlen_enc = vector_length_encoding(this, $src);
23533 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23534 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23535 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23536 %}
23537 ins_pipe( pipe_slow );
23538 %}
23539
23540 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23541 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23542 VM_Version::supports_avx512cd() &&
23543 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23544 match(Set dst (CountTrailingZerosV src));
23545 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23546 ins_cost(400);
23547 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23548 ins_encode %{
23549 int vlen_enc = vector_length_encoding(this, $src);
23550 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23551 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23552 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23553 %}
23554 ins_pipe( pipe_slow );
23555 %}
23556
23557 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23558 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23559 match(Set dst (CountTrailingZerosV src));
23560 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23561 ins_cost(400);
23562 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23563 ins_encode %{
23564 int vlen_enc = vector_length_encoding(this, $src);
23565 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23566 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23567 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23568 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23569 %}
23570 ins_pipe( pipe_slow );
23571 %}
23572
23573 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23574 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23575 match(Set dst (CountTrailingZerosV src));
23576 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23577 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23578 ins_encode %{
23579 int vlen_enc = vector_length_encoding(this, $src);
23580 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23581 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23582 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23583 %}
23584 ins_pipe( pipe_slow );
23585 %}
23586
23587
23588 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23589
23590 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23591 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23592 effect(TEMP dst);
23593 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23594 ins_encode %{
23595 int vector_len = vector_length_encoding(this);
23596 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23597 %}
23598 ins_pipe( pipe_slow );
23599 %}
23600
23601 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23602 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23603 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23604 effect(TEMP dst);
23605 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23606 ins_encode %{
23607 int vector_len = vector_length_encoding(this);
23608 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23609 %}
23610 ins_pipe( pipe_slow );
23611 %}
23612
23613 // --------------------------------- Rotation Operations ----------------------------------
23614 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23615 match(Set dst (RotateLeftV src shift));
23616 match(Set dst (RotateRightV src shift));
23617 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23618 ins_encode %{
23619 int opcode = this->ideal_Opcode();
23620 int vector_len = vector_length_encoding(this);
23621 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23622 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23623 %}
23624 ins_pipe( pipe_slow );
23625 %}
23626
23627 instruct vprorate(vec dst, vec src, vec shift) %{
23628 match(Set dst (RotateLeftV src shift));
23629 match(Set dst (RotateRightV src shift));
23630 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23631 ins_encode %{
23632 int opcode = this->ideal_Opcode();
23633 int vector_len = vector_length_encoding(this);
23634 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23635 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23636 %}
23637 ins_pipe( pipe_slow );
23638 %}
23639
23640 // ---------------------------------- Masked Operations ------------------------------------
23641 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23642 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23643 match(Set dst (LoadVectorMasked mem mask));
23644 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23645 ins_encode %{
23646 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23647 int vlen_enc = vector_length_encoding(this);
23648 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23649 %}
23650 ins_pipe( pipe_slow );
23651 %}
23652
23653
23654 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23655 predicate(n->in(3)->bottom_type()->isa_vectmask());
23656 match(Set dst (LoadVectorMasked mem mask));
23657 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23658 ins_encode %{
23659 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23660 int vector_len = vector_length_encoding(this);
23661 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23662 %}
23663 ins_pipe( pipe_slow );
23664 %}
23665
23666 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23667 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23668 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23669 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23670 ins_encode %{
23671 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23672 int vlen_enc = vector_length_encoding(src_node);
23673 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23674 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23675 %}
23676 ins_pipe( pipe_slow );
23677 %}
23678
23679 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23680 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23681 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23682 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23683 ins_encode %{
23684 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23685 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23686 int vlen_enc = vector_length_encoding(src_node);
23687 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23688 %}
23689 ins_pipe( pipe_slow );
23690 %}
23691
23692 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23693 match(Set addr (VerifyVectorAlignment addr mask));
23694 effect(KILL cr);
23695 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23696 ins_encode %{
23697 Label Lskip;
23698 // check if masked bits of addr are zero
23699 __ testq($addr$$Register, $mask$$constant);
23700 __ jccb(Assembler::equal, Lskip);
23701 __ stop("verify_vector_alignment found a misaligned vector memory access");
23702 __ bind(Lskip);
23703 %}
23704 ins_pipe(pipe_slow);
23705 %}
23706
23707 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23708 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23709 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23710 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23711 ins_encode %{
23712 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23713 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23714
23715 Label DONE;
23716 int vlen_enc = vector_length_encoding(this, $src1);
23717 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23718
23719 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23720 __ mov64($dst$$Register, -1L);
23721 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23722 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23723 __ jccb(Assembler::carrySet, DONE);
23724 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23725 __ notq($dst$$Register);
23726 __ tzcntq($dst$$Register, $dst$$Register);
23727 __ bind(DONE);
23728 %}
23729 ins_pipe( pipe_slow );
23730 %}
23731
23732
23733 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23734 match(Set dst (VectorMaskGen len));
23735 effect(TEMP temp, KILL cr);
23736 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23737 ins_encode %{
23738 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23739 %}
23740 ins_pipe( pipe_slow );
23741 %}
23742
23743 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23744 match(Set dst (VectorMaskGen len));
23745 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23746 effect(TEMP temp);
23747 ins_encode %{
23748 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23749 __ kmovql($dst$$KRegister, $temp$$Register);
23750 %}
23751 ins_pipe( pipe_slow );
23752 %}
23753
23754 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23755 predicate(n->in(1)->bottom_type()->isa_vectmask());
23756 match(Set dst (VectorMaskToLong mask));
23757 effect(TEMP dst, KILL cr);
23758 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23759 ins_encode %{
23760 int opcode = this->ideal_Opcode();
23761 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23762 int mask_len = Matcher::vector_length(this, $mask);
23763 int mask_size = mask_len * type2aelembytes(mbt);
23764 int vlen_enc = vector_length_encoding(this, $mask);
23765 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23766 $dst$$Register, mask_len, mask_size, vlen_enc);
23767 %}
23768 ins_pipe( pipe_slow );
23769 %}
23770
23771 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23772 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23773 match(Set dst (VectorMaskToLong mask));
23774 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23775 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23776 ins_encode %{
23777 int opcode = this->ideal_Opcode();
23778 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23779 int mask_len = Matcher::vector_length(this, $mask);
23780 int vlen_enc = vector_length_encoding(this, $mask);
23781 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23782 $dst$$Register, mask_len, mbt, vlen_enc);
23783 %}
23784 ins_pipe( pipe_slow );
23785 %}
23786
23787 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23788 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23789 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23790 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23791 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23792 ins_encode %{
23793 int opcode = this->ideal_Opcode();
23794 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23795 int mask_len = Matcher::vector_length(this, $mask);
23796 int vlen_enc = vector_length_encoding(this, $mask);
23797 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23798 $dst$$Register, mask_len, mbt, vlen_enc);
23799 %}
23800 ins_pipe( pipe_slow );
23801 %}
23802
23803 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23804 predicate(n->in(1)->bottom_type()->isa_vectmask());
23805 match(Set dst (VectorMaskTrueCount mask));
23806 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23807 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23808 ins_encode %{
23809 int opcode = this->ideal_Opcode();
23810 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23811 int mask_len = Matcher::vector_length(this, $mask);
23812 int mask_size = mask_len * type2aelembytes(mbt);
23813 int vlen_enc = vector_length_encoding(this, $mask);
23814 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23815 $tmp$$Register, mask_len, mask_size, vlen_enc);
23816 %}
23817 ins_pipe( pipe_slow );
23818 %}
23819
23820 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23821 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23822 match(Set dst (VectorMaskTrueCount mask));
23823 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23824 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23825 ins_encode %{
23826 int opcode = this->ideal_Opcode();
23827 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23828 int mask_len = Matcher::vector_length(this, $mask);
23829 int vlen_enc = vector_length_encoding(this, $mask);
23830 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23831 $tmp$$Register, mask_len, mbt, vlen_enc);
23832 %}
23833 ins_pipe( pipe_slow );
23834 %}
23835
23836 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23837 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23838 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23839 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23840 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23841 ins_encode %{
23842 int opcode = this->ideal_Opcode();
23843 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23844 int mask_len = Matcher::vector_length(this, $mask);
23845 int vlen_enc = vector_length_encoding(this, $mask);
23846 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23847 $tmp$$Register, mask_len, mbt, vlen_enc);
23848 %}
23849 ins_pipe( pipe_slow );
23850 %}
23851
23852 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23853 predicate(n->in(1)->bottom_type()->isa_vectmask());
23854 match(Set dst (VectorMaskFirstTrue mask));
23855 match(Set dst (VectorMaskLastTrue mask));
23856 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23857 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23858 ins_encode %{
23859 int opcode = this->ideal_Opcode();
23860 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23861 int mask_len = Matcher::vector_length(this, $mask);
23862 int mask_size = mask_len * type2aelembytes(mbt);
23863 int vlen_enc = vector_length_encoding(this, $mask);
23864 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23865 $tmp$$Register, mask_len, mask_size, vlen_enc);
23866 %}
23867 ins_pipe( pipe_slow );
23868 %}
23869
23870 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23871 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23872 match(Set dst (VectorMaskFirstTrue mask));
23873 match(Set dst (VectorMaskLastTrue mask));
23874 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23875 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23876 ins_encode %{
23877 int opcode = this->ideal_Opcode();
23878 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23879 int mask_len = Matcher::vector_length(this, $mask);
23880 int vlen_enc = vector_length_encoding(this, $mask);
23881 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23882 $tmp$$Register, mask_len, mbt, vlen_enc);
23883 %}
23884 ins_pipe( pipe_slow );
23885 %}
23886
23887 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23888 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23889 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23890 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23891 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23892 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23893 ins_encode %{
23894 int opcode = this->ideal_Opcode();
23895 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23896 int mask_len = Matcher::vector_length(this, $mask);
23897 int vlen_enc = vector_length_encoding(this, $mask);
23898 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23899 $tmp$$Register, mask_len, mbt, vlen_enc);
23900 %}
23901 ins_pipe( pipe_slow );
23902 %}
23903
23904 // --------------------------------- Compress/Expand Operations ---------------------------
23905 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23906 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23907 match(Set dst (CompressV src mask));
23908 match(Set dst (ExpandV src mask));
23909 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23910 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23911 ins_encode %{
23912 int opcode = this->ideal_Opcode();
23913 int vlen_enc = vector_length_encoding(this);
23914 BasicType bt = Matcher::vector_element_basic_type(this);
23915 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23916 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23917 %}
23918 ins_pipe( pipe_slow );
23919 %}
23920
23921 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23922 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23923 match(Set dst (CompressV src mask));
23924 match(Set dst (ExpandV src mask));
23925 format %{ "vector_compress_expand $dst, $src, $mask" %}
23926 ins_encode %{
23927 int opcode = this->ideal_Opcode();
23928 int vector_len = vector_length_encoding(this);
23929 BasicType bt = Matcher::vector_element_basic_type(this);
23930 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23931 %}
23932 ins_pipe( pipe_slow );
23933 %}
23934
23935 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23936 match(Set dst (CompressM mask));
23937 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23938 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23939 ins_encode %{
23940 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23941 int mask_len = Matcher::vector_length(this);
23942 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23943 %}
23944 ins_pipe( pipe_slow );
23945 %}
23946
23947 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23948
23949 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23950 predicate(!VM_Version::supports_gfni());
23951 match(Set dst (ReverseV src));
23952 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23953 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23954 ins_encode %{
23955 int vec_enc = vector_length_encoding(this);
23956 BasicType bt = Matcher::vector_element_basic_type(this);
23957 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23958 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23959 %}
23960 ins_pipe( pipe_slow );
23961 %}
23962
23963 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23964 predicate(VM_Version::supports_gfni());
23965 match(Set dst (ReverseV src));
23966 effect(TEMP dst, TEMP xtmp);
23967 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23968 ins_encode %{
23969 int vec_enc = vector_length_encoding(this);
23970 BasicType bt = Matcher::vector_element_basic_type(this);
23971 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23972 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23973 $xtmp$$XMMRegister);
23974 %}
23975 ins_pipe( pipe_slow );
23976 %}
23977
23978 instruct vreverse_byte_reg(vec dst, vec src) %{
23979 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23980 match(Set dst (ReverseBytesV src));
23981 effect(TEMP dst);
23982 format %{ "vector_reverse_byte $dst, $src" %}
23983 ins_encode %{
23984 int vec_enc = vector_length_encoding(this);
23985 BasicType bt = Matcher::vector_element_basic_type(this);
23986 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23987 %}
23988 ins_pipe( pipe_slow );
23989 %}
23990
23991 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23992 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23993 match(Set dst (ReverseBytesV src));
23994 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23995 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23996 ins_encode %{
23997 int vec_enc = vector_length_encoding(this);
23998 BasicType bt = Matcher::vector_element_basic_type(this);
23999 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24000 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24001 %}
24002 ins_pipe( pipe_slow );
24003 %}
24004
24005 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24006
24007 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24008 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24009 Matcher::vector_length_in_bytes(n->in(1))));
24010 match(Set dst (CountLeadingZerosV src));
24011 format %{ "vector_count_leading_zeros $dst, $src" %}
24012 ins_encode %{
24013 int vlen_enc = vector_length_encoding(this, $src);
24014 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24015 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24016 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24017 %}
24018 ins_pipe( pipe_slow );
24019 %}
24020
24021 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24022 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24023 Matcher::vector_length_in_bytes(n->in(1))));
24024 match(Set dst (CountLeadingZerosV src mask));
24025 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24026 ins_encode %{
24027 int vlen_enc = vector_length_encoding(this, $src);
24028 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24029 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24030 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24031 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24032 %}
24033 ins_pipe( pipe_slow );
24034 %}
24035
24036 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24037 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24038 VM_Version::supports_avx512cd() &&
24039 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24040 match(Set dst (CountLeadingZerosV src));
24041 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24042 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24043 ins_encode %{
24044 int vlen_enc = vector_length_encoding(this, $src);
24045 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24046 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24047 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24048 %}
24049 ins_pipe( pipe_slow );
24050 %}
24051
24052 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24053 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24054 match(Set dst (CountLeadingZerosV src));
24055 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24056 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24057 ins_encode %{
24058 int vlen_enc = vector_length_encoding(this, $src);
24059 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24060 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24061 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24062 $rtmp$$Register, true, vlen_enc);
24063 %}
24064 ins_pipe( pipe_slow );
24065 %}
24066
24067 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24068 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24069 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24070 match(Set dst (CountLeadingZerosV src));
24071 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24072 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24073 ins_encode %{
24074 int vlen_enc = vector_length_encoding(this, $src);
24075 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24076 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24077 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24078 %}
24079 ins_pipe( pipe_slow );
24080 %}
24081
24082 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24083 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24084 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24085 match(Set dst (CountLeadingZerosV src));
24086 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24087 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24088 ins_encode %{
24089 int vlen_enc = vector_length_encoding(this, $src);
24090 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24091 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24092 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24093 %}
24094 ins_pipe( pipe_slow );
24095 %}
24096
24097 // ---------------------------------- Vector Masked Operations ------------------------------------
24098
24099 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24100 match(Set dst (AddVB (Binary dst src2) mask));
24101 match(Set dst (AddVS (Binary dst src2) mask));
24102 match(Set dst (AddVI (Binary dst src2) mask));
24103 match(Set dst (AddVL (Binary dst src2) mask));
24104 match(Set dst (AddVF (Binary dst src2) mask));
24105 match(Set dst (AddVD (Binary dst src2) mask));
24106 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24107 ins_encode %{
24108 int vlen_enc = vector_length_encoding(this);
24109 BasicType bt = Matcher::vector_element_basic_type(this);
24110 int opc = this->ideal_Opcode();
24111 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24112 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24113 %}
24114 ins_pipe( pipe_slow );
24115 %}
24116
24117 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24118 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24119 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24120 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24121 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24122 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24123 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24124 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24125 ins_encode %{
24126 int vlen_enc = vector_length_encoding(this);
24127 BasicType bt = Matcher::vector_element_basic_type(this);
24128 int opc = this->ideal_Opcode();
24129 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24130 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24131 %}
24132 ins_pipe( pipe_slow );
24133 %}
24134
24135 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24136 match(Set dst (XorV (Binary dst src2) mask));
24137 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24138 ins_encode %{
24139 int vlen_enc = vector_length_encoding(this);
24140 BasicType bt = Matcher::vector_element_basic_type(this);
24141 int opc = this->ideal_Opcode();
24142 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24143 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24144 %}
24145 ins_pipe( pipe_slow );
24146 %}
24147
24148 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24149 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24150 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24151 ins_encode %{
24152 int vlen_enc = vector_length_encoding(this);
24153 BasicType bt = Matcher::vector_element_basic_type(this);
24154 int opc = this->ideal_Opcode();
24155 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24156 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24157 %}
24158 ins_pipe( pipe_slow );
24159 %}
24160
24161 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24162 match(Set dst (OrV (Binary dst src2) mask));
24163 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24164 ins_encode %{
24165 int vlen_enc = vector_length_encoding(this);
24166 BasicType bt = Matcher::vector_element_basic_type(this);
24167 int opc = this->ideal_Opcode();
24168 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24169 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24170 %}
24171 ins_pipe( pipe_slow );
24172 %}
24173
24174 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24175 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24176 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24177 ins_encode %{
24178 int vlen_enc = vector_length_encoding(this);
24179 BasicType bt = Matcher::vector_element_basic_type(this);
24180 int opc = this->ideal_Opcode();
24181 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24182 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24183 %}
24184 ins_pipe( pipe_slow );
24185 %}
24186
24187 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24188 match(Set dst (AndV (Binary dst src2) mask));
24189 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24190 ins_encode %{
24191 int vlen_enc = vector_length_encoding(this);
24192 BasicType bt = Matcher::vector_element_basic_type(this);
24193 int opc = this->ideal_Opcode();
24194 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24195 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24196 %}
24197 ins_pipe( pipe_slow );
24198 %}
24199
24200 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24201 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24202 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24203 ins_encode %{
24204 int vlen_enc = vector_length_encoding(this);
24205 BasicType bt = Matcher::vector_element_basic_type(this);
24206 int opc = this->ideal_Opcode();
24207 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24208 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24209 %}
24210 ins_pipe( pipe_slow );
24211 %}
24212
24213 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24214 match(Set dst (SubVB (Binary dst src2) mask));
24215 match(Set dst (SubVS (Binary dst src2) mask));
24216 match(Set dst (SubVI (Binary dst src2) mask));
24217 match(Set dst (SubVL (Binary dst src2) mask));
24218 match(Set dst (SubVF (Binary dst src2) mask));
24219 match(Set dst (SubVD (Binary dst src2) mask));
24220 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24221 ins_encode %{
24222 int vlen_enc = vector_length_encoding(this);
24223 BasicType bt = Matcher::vector_element_basic_type(this);
24224 int opc = this->ideal_Opcode();
24225 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24226 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24227 %}
24228 ins_pipe( pipe_slow );
24229 %}
24230
24231 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24232 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24233 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24234 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24235 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24236 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24237 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24238 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24239 ins_encode %{
24240 int vlen_enc = vector_length_encoding(this);
24241 BasicType bt = Matcher::vector_element_basic_type(this);
24242 int opc = this->ideal_Opcode();
24243 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24244 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24245 %}
24246 ins_pipe( pipe_slow );
24247 %}
24248
24249 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24250 match(Set dst (MulVS (Binary dst src2) mask));
24251 match(Set dst (MulVI (Binary dst src2) mask));
24252 match(Set dst (MulVL (Binary dst src2) mask));
24253 match(Set dst (MulVF (Binary dst src2) mask));
24254 match(Set dst (MulVD (Binary dst src2) mask));
24255 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24256 ins_encode %{
24257 int vlen_enc = vector_length_encoding(this);
24258 BasicType bt = Matcher::vector_element_basic_type(this);
24259 int opc = this->ideal_Opcode();
24260 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24261 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24262 %}
24263 ins_pipe( pipe_slow );
24264 %}
24265
24266 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24267 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24268 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24269 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24270 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24271 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24272 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24273 ins_encode %{
24274 int vlen_enc = vector_length_encoding(this);
24275 BasicType bt = Matcher::vector_element_basic_type(this);
24276 int opc = this->ideal_Opcode();
24277 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24278 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24279 %}
24280 ins_pipe( pipe_slow );
24281 %}
24282
24283 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24284 match(Set dst (SqrtVF dst mask));
24285 match(Set dst (SqrtVD dst mask));
24286 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24287 ins_encode %{
24288 int vlen_enc = vector_length_encoding(this);
24289 BasicType bt = Matcher::vector_element_basic_type(this);
24290 int opc = this->ideal_Opcode();
24291 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24292 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24293 %}
24294 ins_pipe( pipe_slow );
24295 %}
24296
24297 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24298 match(Set dst (DivVF (Binary dst src2) mask));
24299 match(Set dst (DivVD (Binary dst src2) mask));
24300 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24301 ins_encode %{
24302 int vlen_enc = vector_length_encoding(this);
24303 BasicType bt = Matcher::vector_element_basic_type(this);
24304 int opc = this->ideal_Opcode();
24305 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24306 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24307 %}
24308 ins_pipe( pipe_slow );
24309 %}
24310
24311 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24312 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24313 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24314 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24315 ins_encode %{
24316 int vlen_enc = vector_length_encoding(this);
24317 BasicType bt = Matcher::vector_element_basic_type(this);
24318 int opc = this->ideal_Opcode();
24319 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24320 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24321 %}
24322 ins_pipe( pipe_slow );
24323 %}
24324
24325
24326 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24327 match(Set dst (RotateLeftV (Binary dst shift) mask));
24328 match(Set dst (RotateRightV (Binary dst shift) mask));
24329 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24330 ins_encode %{
24331 int vlen_enc = vector_length_encoding(this);
24332 BasicType bt = Matcher::vector_element_basic_type(this);
24333 int opc = this->ideal_Opcode();
24334 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24335 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24336 %}
24337 ins_pipe( pipe_slow );
24338 %}
24339
24340 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24341 match(Set dst (RotateLeftV (Binary dst src2) mask));
24342 match(Set dst (RotateRightV (Binary dst src2) mask));
24343 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24344 ins_encode %{
24345 int vlen_enc = vector_length_encoding(this);
24346 BasicType bt = Matcher::vector_element_basic_type(this);
24347 int opc = this->ideal_Opcode();
24348 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24349 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24350 %}
24351 ins_pipe( pipe_slow );
24352 %}
24353
24354 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24355 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24356 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24357 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24358 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24359 ins_encode %{
24360 int vlen_enc = vector_length_encoding(this);
24361 BasicType bt = Matcher::vector_element_basic_type(this);
24362 int opc = this->ideal_Opcode();
24363 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24364 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24365 %}
24366 ins_pipe( pipe_slow );
24367 %}
24368
24369 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24370 predicate(!n->as_ShiftV()->is_var_shift());
24371 match(Set dst (LShiftVS (Binary dst src2) mask));
24372 match(Set dst (LShiftVI (Binary dst src2) mask));
24373 match(Set dst (LShiftVL (Binary dst src2) mask));
24374 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24375 ins_encode %{
24376 int vlen_enc = vector_length_encoding(this);
24377 BasicType bt = Matcher::vector_element_basic_type(this);
24378 int opc = this->ideal_Opcode();
24379 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24380 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24381 %}
24382 ins_pipe( pipe_slow );
24383 %}
24384
24385 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24386 predicate(n->as_ShiftV()->is_var_shift());
24387 match(Set dst (LShiftVS (Binary dst src2) mask));
24388 match(Set dst (LShiftVI (Binary dst src2) mask));
24389 match(Set dst (LShiftVL (Binary dst src2) mask));
24390 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24391 ins_encode %{
24392 int vlen_enc = vector_length_encoding(this);
24393 BasicType bt = Matcher::vector_element_basic_type(this);
24394 int opc = this->ideal_Opcode();
24395 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24396 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24397 %}
24398 ins_pipe( pipe_slow );
24399 %}
24400
24401 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24402 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24403 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24404 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24405 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24406 ins_encode %{
24407 int vlen_enc = vector_length_encoding(this);
24408 BasicType bt = Matcher::vector_element_basic_type(this);
24409 int opc = this->ideal_Opcode();
24410 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24411 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24412 %}
24413 ins_pipe( pipe_slow );
24414 %}
24415
24416 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24417 predicate(!n->as_ShiftV()->is_var_shift());
24418 match(Set dst (RShiftVS (Binary dst src2) mask));
24419 match(Set dst (RShiftVI (Binary dst src2) mask));
24420 match(Set dst (RShiftVL (Binary dst src2) mask));
24421 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24422 ins_encode %{
24423 int vlen_enc = vector_length_encoding(this);
24424 BasicType bt = Matcher::vector_element_basic_type(this);
24425 int opc = this->ideal_Opcode();
24426 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24427 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24428 %}
24429 ins_pipe( pipe_slow );
24430 %}
24431
24432 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24433 predicate(n->as_ShiftV()->is_var_shift());
24434 match(Set dst (RShiftVS (Binary dst src2) mask));
24435 match(Set dst (RShiftVI (Binary dst src2) mask));
24436 match(Set dst (RShiftVL (Binary dst src2) mask));
24437 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24438 ins_encode %{
24439 int vlen_enc = vector_length_encoding(this);
24440 BasicType bt = Matcher::vector_element_basic_type(this);
24441 int opc = this->ideal_Opcode();
24442 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24443 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24444 %}
24445 ins_pipe( pipe_slow );
24446 %}
24447
24448 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24449 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24450 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24451 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24452 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24453 ins_encode %{
24454 int vlen_enc = vector_length_encoding(this);
24455 BasicType bt = Matcher::vector_element_basic_type(this);
24456 int opc = this->ideal_Opcode();
24457 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24458 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24459 %}
24460 ins_pipe( pipe_slow );
24461 %}
24462
24463 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24464 predicate(!n->as_ShiftV()->is_var_shift());
24465 match(Set dst (URShiftVS (Binary dst src2) mask));
24466 match(Set dst (URShiftVI (Binary dst src2) mask));
24467 match(Set dst (URShiftVL (Binary dst src2) mask));
24468 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24469 ins_encode %{
24470 int vlen_enc = vector_length_encoding(this);
24471 BasicType bt = Matcher::vector_element_basic_type(this);
24472 int opc = this->ideal_Opcode();
24473 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24474 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24475 %}
24476 ins_pipe( pipe_slow );
24477 %}
24478
24479 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24480 predicate(n->as_ShiftV()->is_var_shift());
24481 match(Set dst (URShiftVS (Binary dst src2) mask));
24482 match(Set dst (URShiftVI (Binary dst src2) mask));
24483 match(Set dst (URShiftVL (Binary dst src2) mask));
24484 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24485 ins_encode %{
24486 int vlen_enc = vector_length_encoding(this);
24487 BasicType bt = Matcher::vector_element_basic_type(this);
24488 int opc = this->ideal_Opcode();
24489 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24490 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24491 %}
24492 ins_pipe( pipe_slow );
24493 %}
24494
24495 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24496 match(Set dst (MaxV (Binary dst src2) mask));
24497 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24498 ins_encode %{
24499 int vlen_enc = vector_length_encoding(this);
24500 BasicType bt = Matcher::vector_element_basic_type(this);
24501 int opc = this->ideal_Opcode();
24502 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24503 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24504 %}
24505 ins_pipe( pipe_slow );
24506 %}
24507
24508 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24509 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24510 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24511 ins_encode %{
24512 int vlen_enc = vector_length_encoding(this);
24513 BasicType bt = Matcher::vector_element_basic_type(this);
24514 int opc = this->ideal_Opcode();
24515 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24516 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24517 %}
24518 ins_pipe( pipe_slow );
24519 %}
24520
24521 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24522 match(Set dst (MinV (Binary dst src2) mask));
24523 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24524 ins_encode %{
24525 int vlen_enc = vector_length_encoding(this);
24526 BasicType bt = Matcher::vector_element_basic_type(this);
24527 int opc = this->ideal_Opcode();
24528 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24529 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24530 %}
24531 ins_pipe( pipe_slow );
24532 %}
24533
24534 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24535 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24536 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24537 ins_encode %{
24538 int vlen_enc = vector_length_encoding(this);
24539 BasicType bt = Matcher::vector_element_basic_type(this);
24540 int opc = this->ideal_Opcode();
24541 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24542 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24543 %}
24544 ins_pipe( pipe_slow );
24545 %}
24546
24547 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24548 match(Set dst (VectorRearrange (Binary dst src2) mask));
24549 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24550 ins_encode %{
24551 int vlen_enc = vector_length_encoding(this);
24552 BasicType bt = Matcher::vector_element_basic_type(this);
24553 int opc = this->ideal_Opcode();
24554 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24555 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24556 %}
24557 ins_pipe( pipe_slow );
24558 %}
24559
24560 instruct vabs_masked(vec dst, kReg mask) %{
24561 match(Set dst (AbsVB dst mask));
24562 match(Set dst (AbsVS dst mask));
24563 match(Set dst (AbsVI dst mask));
24564 match(Set dst (AbsVL dst mask));
24565 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24566 ins_encode %{
24567 int vlen_enc = vector_length_encoding(this);
24568 BasicType bt = Matcher::vector_element_basic_type(this);
24569 int opc = this->ideal_Opcode();
24570 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24571 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24572 %}
24573 ins_pipe( pipe_slow );
24574 %}
24575
24576 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24577 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24578 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24579 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24580 ins_encode %{
24581 assert(UseFMA, "Needs FMA instructions support.");
24582 int vlen_enc = vector_length_encoding(this);
24583 BasicType bt = Matcher::vector_element_basic_type(this);
24584 int opc = this->ideal_Opcode();
24585 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24586 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24587 %}
24588 ins_pipe( pipe_slow );
24589 %}
24590
24591 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24592 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24593 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24594 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24595 ins_encode %{
24596 assert(UseFMA, "Needs FMA instructions support.");
24597 int vlen_enc = vector_length_encoding(this);
24598 BasicType bt = Matcher::vector_element_basic_type(this);
24599 int opc = this->ideal_Opcode();
24600 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24601 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24602 %}
24603 ins_pipe( pipe_slow );
24604 %}
24605
24606 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24607 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24608 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24609 ins_encode %{
24610 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24611 int vlen_enc = vector_length_encoding(this, $src1);
24612 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24613
24614 // Comparison i
24615 switch (src1_elem_bt) {
24616 case T_BYTE: {
24617 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24618 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24619 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24620 break;
24621 }
24622 case T_SHORT: {
24623 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24624 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24625 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24626 break;
24627 }
24628 case T_INT: {
24629 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24630 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24631 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24632 break;
24633 }
24634 case T_LONG: {
24635 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24636 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24637 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24638 break;
24639 }
24640 case T_FLOAT: {
24641 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24642 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24643 break;
24644 }
24645 case T_DOUBLE: {
24646 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24647 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24648 break;
24649 }
24650 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24651 }
24652 %}
24653 ins_pipe( pipe_slow );
24654 %}
24655
24656 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24657 predicate(Matcher::vector_length(n) <= 32);
24658 match(Set dst (MaskAll src));
24659 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24660 ins_encode %{
24661 int mask_len = Matcher::vector_length(this);
24662 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24663 %}
24664 ins_pipe( pipe_slow );
24665 %}
24666
24667 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24668 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24669 match(Set dst (XorVMask src (MaskAll cnt)));
24670 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24671 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24672 ins_encode %{
24673 uint masklen = Matcher::vector_length(this);
24674 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24675 %}
24676 ins_pipe( pipe_slow );
24677 %}
24678
24679 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24680 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24681 (Matcher::vector_length(n) == 16) ||
24682 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24683 match(Set dst (XorVMask src (MaskAll cnt)));
24684 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24685 ins_encode %{
24686 uint masklen = Matcher::vector_length(this);
24687 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24688 %}
24689 ins_pipe( pipe_slow );
24690 %}
24691
24692 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24693 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24694 match(Set dst (VectorLongToMask src));
24695 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24696 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24697 ins_encode %{
24698 int mask_len = Matcher::vector_length(this);
24699 int vec_enc = vector_length_encoding(mask_len);
24700 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24701 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24702 %}
24703 ins_pipe( pipe_slow );
24704 %}
24705
24706
24707 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24708 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24709 match(Set dst (VectorLongToMask src));
24710 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24711 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24712 ins_encode %{
24713 int mask_len = Matcher::vector_length(this);
24714 assert(mask_len <= 32, "invalid mask length");
24715 int vec_enc = vector_length_encoding(mask_len);
24716 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24717 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24718 %}
24719 ins_pipe( pipe_slow );
24720 %}
24721
24722 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24723 predicate(n->bottom_type()->isa_vectmask());
24724 match(Set dst (VectorLongToMask src));
24725 format %{ "long_to_mask_evex $dst, $src\t!" %}
24726 ins_encode %{
24727 __ kmov($dst$$KRegister, $src$$Register);
24728 %}
24729 ins_pipe( pipe_slow );
24730 %}
24731
24732 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24733 match(Set dst (AndVMask src1 src2));
24734 match(Set dst (OrVMask src1 src2));
24735 match(Set dst (XorVMask src1 src2));
24736 effect(TEMP kscratch);
24737 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24738 ins_encode %{
24739 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24740 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24741 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24742 uint masklen = Matcher::vector_length(this);
24743 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24744 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24745 %}
24746 ins_pipe( pipe_slow );
24747 %}
24748
24749 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24750 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24751 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24752 ins_encode %{
24753 int vlen_enc = vector_length_encoding(this);
24754 BasicType bt = Matcher::vector_element_basic_type(this);
24755 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24756 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24757 %}
24758 ins_pipe( pipe_slow );
24759 %}
24760
24761 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24762 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24763 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24764 ins_encode %{
24765 int vlen_enc = vector_length_encoding(this);
24766 BasicType bt = Matcher::vector_element_basic_type(this);
24767 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24768 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24769 %}
24770 ins_pipe( pipe_slow );
24771 %}
24772
24773 instruct castMM(kReg dst)
24774 %{
24775 match(Set dst (CastVV dst));
24776
24777 size(0);
24778 format %{ "# castVV of $dst" %}
24779 ins_encode(/* empty encoding */);
24780 ins_cost(0);
24781 ins_pipe(empty);
24782 %}
24783
24784 instruct castVV(vec dst)
24785 %{
24786 match(Set dst (CastVV dst));
24787
24788 size(0);
24789 format %{ "# castVV of $dst" %}
24790 ins_encode(/* empty encoding */);
24791 ins_cost(0);
24792 ins_pipe(empty);
24793 %}
24794
24795 instruct castVVLeg(legVec dst)
24796 %{
24797 match(Set dst (CastVV dst));
24798
24799 size(0);
24800 format %{ "# castVV of $dst" %}
24801 ins_encode(/* empty encoding */);
24802 ins_cost(0);
24803 ins_pipe(empty);
24804 %}
24805
24806 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24807 %{
24808 match(Set dst (IsInfiniteF src));
24809 effect(TEMP ktmp, KILL cr);
24810 format %{ "float_class_check $dst, $src" %}
24811 ins_encode %{
24812 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24813 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24814 %}
24815 ins_pipe(pipe_slow);
24816 %}
24817
24818 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24819 %{
24820 match(Set dst (IsInfiniteD src));
24821 effect(TEMP ktmp, KILL cr);
24822 format %{ "double_class_check $dst, $src" %}
24823 ins_encode %{
24824 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24825 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24826 %}
24827 ins_pipe(pipe_slow);
24828 %}
24829
24830 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24831 %{
24832 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24833 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24834 match(Set dst (SaturatingAddV src1 src2));
24835 match(Set dst (SaturatingSubV src1 src2));
24836 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24837 ins_encode %{
24838 int vlen_enc = vector_length_encoding(this);
24839 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24840 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24841 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24842 %}
24843 ins_pipe(pipe_slow);
24844 %}
24845
24846 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24847 %{
24848 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24849 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24850 match(Set dst (SaturatingAddV src1 src2));
24851 match(Set dst (SaturatingSubV src1 src2));
24852 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24853 ins_encode %{
24854 int vlen_enc = vector_length_encoding(this);
24855 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24856 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24857 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24858 %}
24859 ins_pipe(pipe_slow);
24860 %}
24861
24862 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24863 %{
24864 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24865 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24866 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24867 match(Set dst (SaturatingAddV src1 src2));
24868 match(Set dst (SaturatingSubV src1 src2));
24869 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24870 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24871 ins_encode %{
24872 int vlen_enc = vector_length_encoding(this);
24873 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24874 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24875 $src1$$XMMRegister, $src2$$XMMRegister,
24876 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24877 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24878 %}
24879 ins_pipe(pipe_slow);
24880 %}
24881
24882 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24883 %{
24884 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24885 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24886 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24887 match(Set dst (SaturatingAddV src1 src2));
24888 match(Set dst (SaturatingSubV src1 src2));
24889 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24890 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24891 ins_encode %{
24892 int vlen_enc = vector_length_encoding(this);
24893 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24894 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24895 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24896 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24897 %}
24898 ins_pipe(pipe_slow);
24899 %}
24900
24901 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24902 %{
24903 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24904 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24905 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24906 match(Set dst (SaturatingAddV src1 src2));
24907 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24908 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24909 ins_encode %{
24910 int vlen_enc = vector_length_encoding(this);
24911 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24912 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24913 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24914 %}
24915 ins_pipe(pipe_slow);
24916 %}
24917
24918 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24919 %{
24920 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24921 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24922 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24923 match(Set dst (SaturatingAddV src1 src2));
24924 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24925 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24926 ins_encode %{
24927 int vlen_enc = vector_length_encoding(this);
24928 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24929 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24930 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24931 %}
24932 ins_pipe(pipe_slow);
24933 %}
24934
24935 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24936 %{
24937 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24938 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24939 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24940 match(Set dst (SaturatingSubV src1 src2));
24941 effect(TEMP ktmp);
24942 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24943 ins_encode %{
24944 int vlen_enc = vector_length_encoding(this);
24945 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24946 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24947 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24948 %}
24949 ins_pipe(pipe_slow);
24950 %}
24951
24952 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24953 %{
24954 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24955 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24956 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24957 match(Set dst (SaturatingSubV src1 src2));
24958 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24959 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24960 ins_encode %{
24961 int vlen_enc = vector_length_encoding(this);
24962 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24963 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24964 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24965 %}
24966 ins_pipe(pipe_slow);
24967 %}
24968
24969 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24970 %{
24971 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24972 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24973 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24974 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24975 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24976 ins_encode %{
24977 int vlen_enc = vector_length_encoding(this);
24978 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24979 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24980 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24981 %}
24982 ins_pipe(pipe_slow);
24983 %}
24984
24985 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24986 %{
24987 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24988 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24989 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24990 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24991 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24992 ins_encode %{
24993 int vlen_enc = vector_length_encoding(this);
24994 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24995 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24996 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24997 %}
24998 ins_pipe(pipe_slow);
24999 %}
25000
25001 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25002 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25003 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25004 match(Set dst (SaturatingAddV (Binary dst src) mask));
25005 match(Set dst (SaturatingSubV (Binary dst src) mask));
25006 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25007 ins_encode %{
25008 int vlen_enc = vector_length_encoding(this);
25009 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25010 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25011 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25012 %}
25013 ins_pipe( pipe_slow );
25014 %}
25015
25016 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25017 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25018 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25019 match(Set dst (SaturatingAddV (Binary dst src) mask));
25020 match(Set dst (SaturatingSubV (Binary dst src) mask));
25021 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25022 ins_encode %{
25023 int vlen_enc = vector_length_encoding(this);
25024 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25025 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25026 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25027 %}
25028 ins_pipe( pipe_slow );
25029 %}
25030
25031 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25032 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25033 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25034 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25035 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25036 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25037 ins_encode %{
25038 int vlen_enc = vector_length_encoding(this);
25039 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25040 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25041 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25042 %}
25043 ins_pipe( pipe_slow );
25044 %}
25045
25046 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25047 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25048 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25049 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25050 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25051 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25052 ins_encode %{
25053 int vlen_enc = vector_length_encoding(this);
25054 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25055 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25056 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25057 %}
25058 ins_pipe( pipe_slow );
25059 %}
25060
25061 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25062 %{
25063 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25064 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25065 ins_encode %{
25066 int vlen_enc = vector_length_encoding(this);
25067 BasicType bt = Matcher::vector_element_basic_type(this);
25068 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25069 %}
25070 ins_pipe(pipe_slow);
25071 %}
25072
25073 instruct reinterpretS2HF(regF dst, rRegI src)
25074 %{
25075 match(Set dst (ReinterpretS2HF src));
25076 format %{ "vmovw $dst, $src" %}
25077 ins_encode %{
25078 __ vmovw($dst$$XMMRegister, $src$$Register);
25079 %}
25080 ins_pipe(pipe_slow);
25081 %}
25082
25083 instruct reinterpretHF2S(rRegI dst, regF src)
25084 %{
25085 match(Set dst (ReinterpretHF2S src));
25086 format %{ "vmovw $dst, $src" %}
25087 ins_encode %{
25088 __ vmovw($dst$$Register, $src$$XMMRegister);
25089 %}
25090 ins_pipe(pipe_slow);
25091 %}
25092
25093 instruct convF2HFAndS2HF(regF dst, regF src)
25094 %{
25095 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25096 format %{ "convF2HFAndS2HF $dst, $src" %}
25097 ins_encode %{
25098 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25099 %}
25100 ins_pipe(pipe_slow);
25101 %}
25102
25103 instruct convHF2SAndHF2F(regF dst, regF src)
25104 %{
25105 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25106 format %{ "convHF2SAndHF2F $dst, $src" %}
25107 ins_encode %{
25108 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25109 %}
25110 ins_pipe(pipe_slow);
25111 %}
25112
25113 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25114 %{
25115 match(Set dst (SqrtHF src));
25116 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25117 ins_encode %{
25118 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25119 %}
25120 ins_pipe(pipe_slow);
25121 %}
25122
25123 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25124 %{
25125 match(Set dst (AddHF src1 src2));
25126 match(Set dst (DivHF src1 src2));
25127 match(Set dst (MulHF src1 src2));
25128 match(Set dst (SubHF src1 src2));
25129 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25130 ins_encode %{
25131 int opcode = this->ideal_Opcode();
25132 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25133 %}
25134 ins_pipe(pipe_slow);
25135 %}
25136
25137 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25138 %{
25139 predicate(VM_Version::supports_avx10_2());
25140 match(Set dst (MaxHF src1 src2));
25141 match(Set dst (MinHF src1 src2));
25142 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25143 ins_encode %{
25144 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25145 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25146 %}
25147 ins_pipe( pipe_slow );
25148 %}
25149
25150 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25151 %{
25152 predicate(!VM_Version::supports_avx10_2());
25153 match(Set dst (MaxHF src1 src2));
25154 match(Set dst (MinHF src1 src2));
25155 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25156 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25157 ins_encode %{
25158 int opcode = this->ideal_Opcode();
25159 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25160 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25161 %}
25162 ins_pipe( pipe_slow );
25163 %}
25164
25165 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25166 %{
25167 match(Set dst (FmaHF src2 (Binary dst src1)));
25168 effect(DEF dst);
25169 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25170 ins_encode %{
25171 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25172 %}
25173 ins_pipe( pipe_slow );
25174 %}
25175
25176
25177 instruct vector_sqrt_HF_reg(vec dst, vec src)
25178 %{
25179 match(Set dst (SqrtVHF src));
25180 format %{ "vector_sqrt_fp16 $dst, $src" %}
25181 ins_encode %{
25182 int vlen_enc = vector_length_encoding(this);
25183 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25184 %}
25185 ins_pipe(pipe_slow);
25186 %}
25187
25188 instruct vector_sqrt_HF_mem(vec dst, memory src)
25189 %{
25190 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25191 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25192 ins_encode %{
25193 int vlen_enc = vector_length_encoding(this);
25194 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25195 %}
25196 ins_pipe(pipe_slow);
25197 %}
25198
25199 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25200 %{
25201 match(Set dst (AddVHF src1 src2));
25202 match(Set dst (DivVHF src1 src2));
25203 match(Set dst (MulVHF src1 src2));
25204 match(Set dst (SubVHF src1 src2));
25205 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25206 ins_encode %{
25207 int vlen_enc = vector_length_encoding(this);
25208 int opcode = this->ideal_Opcode();
25209 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25210 %}
25211 ins_pipe(pipe_slow);
25212 %}
25213
25214
25215 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25216 %{
25217 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25218 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25219 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25220 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25221 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25222 ins_encode %{
25223 int vlen_enc = vector_length_encoding(this);
25224 int opcode = this->ideal_Opcode();
25225 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25226 %}
25227 ins_pipe(pipe_slow);
25228 %}
25229
25230 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25231 %{
25232 match(Set dst (FmaVHF src2 (Binary dst src1)));
25233 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25234 ins_encode %{
25235 int vlen_enc = vector_length_encoding(this);
25236 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25237 %}
25238 ins_pipe( pipe_slow );
25239 %}
25240
25241 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25242 %{
25243 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25244 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25245 ins_encode %{
25246 int vlen_enc = vector_length_encoding(this);
25247 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25248 %}
25249 ins_pipe( pipe_slow );
25250 %}
25251
25252 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25253 %{
25254 predicate(VM_Version::supports_avx10_2());
25255 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25256 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25257 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25258 ins_encode %{
25259 int vlen_enc = vector_length_encoding(this);
25260 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25261 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25262 %}
25263 ins_pipe( pipe_slow );
25264 %}
25265
25266 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25267 %{
25268 predicate(VM_Version::supports_avx10_2());
25269 match(Set dst (MinVHF src1 src2));
25270 match(Set dst (MaxVHF src1 src2));
25271 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25272 ins_encode %{
25273 int vlen_enc = vector_length_encoding(this);
25274 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25275 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25276 %}
25277 ins_pipe( pipe_slow );
25278 %}
25279
25280 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25281 %{
25282 predicate(!VM_Version::supports_avx10_2());
25283 match(Set dst (MinVHF src1 src2));
25284 match(Set dst (MaxVHF src1 src2));
25285 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25286 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25287 ins_encode %{
25288 int vlen_enc = vector_length_encoding(this);
25289 int opcode = this->ideal_Opcode();
25290 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25291 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25292 %}
25293 ins_pipe( pipe_slow );
25294 %}
25295
25296 //----------PEEPHOLE RULES-----------------------------------------------------
25297 // These must follow all instruction definitions as they use the names
25298 // defined in the instructions definitions.
25299 //
25300 // peeppredicate ( rule_predicate );
25301 // // the predicate unless which the peephole rule will be ignored
25302 //
25303 // peepmatch ( root_instr_name [preceding_instruction]* );
25304 //
25305 // peepprocedure ( procedure_name );
25306 // // provide a procedure name to perform the optimization, the procedure should
25307 // // reside in the architecture dependent peephole file, the method has the
25308 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25309 // // with the arguments being the basic block, the current node index inside the
25310 // // block, the register allocator, the functions upon invoked return a new node
25311 // // defined in peepreplace, and the rules of the nodes appearing in the
25312 // // corresponding peepmatch, the function return true if successful, else
25313 // // return false
25314 //
25315 // peepconstraint %{
25316 // (instruction_number.operand_name relational_op instruction_number.operand_name
25317 // [, ...] );
25318 // // instruction numbers are zero-based using left to right order in peepmatch
25319 //
25320 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25321 // // provide an instruction_number.operand_name for each operand that appears
25322 // // in the replacement instruction's match rule
25323 //
25324 // ---------VM FLAGS---------------------------------------------------------
25325 //
25326 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25327 //
25328 // Each peephole rule is given an identifying number starting with zero and
25329 // increasing by one in the order seen by the parser. An individual peephole
25330 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25331 // on the command-line.
25332 //
25333 // ---------CURRENT LIMITATIONS----------------------------------------------
25334 //
25335 // Only transformations inside a basic block (do we need more for peephole)
25336 //
25337 // ---------EXAMPLE----------------------------------------------------------
25338 //
25339 // // pertinent parts of existing instructions in architecture description
25340 // instruct movI(rRegI dst, rRegI src)
25341 // %{
25342 // match(Set dst (CopyI src));
25343 // %}
25344 //
25345 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25346 // %{
25347 // match(Set dst (AddI dst src));
25348 // effect(KILL cr);
25349 // %}
25350 //
25351 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25352 // %{
25353 // match(Set dst (AddI dst src));
25354 // %}
25355 //
25356 // 1. Simple replacement
25357 // - Only match adjacent instructions in same basic block
25358 // - Only equality constraints
25359 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25360 // - Only one replacement instruction
25361 //
25362 // // Change (inc mov) to lea
25363 // peephole %{
25364 // // lea should only be emitted when beneficial
25365 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25366 // // increment preceded by register-register move
25367 // peepmatch ( incI_rReg movI );
25368 // // require that the destination register of the increment
25369 // // match the destination register of the move
25370 // peepconstraint ( 0.dst == 1.dst );
25371 // // construct a replacement instruction that sets
25372 // // the destination to ( move's source register + one )
25373 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25374 // %}
25375 //
25376 // 2. Procedural replacement
25377 // - More flexible finding relevent nodes
25378 // - More flexible constraints
25379 // - More flexible transformations
25380 // - May utilise architecture-dependent API more effectively
25381 // - Currently only one replacement instruction due to adlc parsing capabilities
25382 //
25383 // // Change (inc mov) to lea
25384 // peephole %{
25385 // // lea should only be emitted when beneficial
25386 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25387 // // the rule numbers of these nodes inside are passed into the function below
25388 // peepmatch ( incI_rReg movI );
25389 // // the method that takes the responsibility of transformation
25390 // peepprocedure ( inc_mov_to_lea );
25391 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25392 // // node is passed into the function above
25393 // peepreplace ( leaI_rReg_immI() );
25394 // %}
25395
25396 // These instructions is not matched by the matcher but used by the peephole
25397 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25398 %{
25399 predicate(false);
25400 match(Set dst (AddI src1 src2));
25401 format %{ "leal $dst, [$src1 + $src2]" %}
25402 ins_encode %{
25403 Register dst = $dst$$Register;
25404 Register src1 = $src1$$Register;
25405 Register src2 = $src2$$Register;
25406 if (src1 != rbp && src1 != r13) {
25407 __ leal(dst, Address(src1, src2, Address::times_1));
25408 } else {
25409 assert(src2 != rbp && src2 != r13, "");
25410 __ leal(dst, Address(src2, src1, Address::times_1));
25411 }
25412 %}
25413 ins_pipe(ialu_reg_reg);
25414 %}
25415
25416 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25417 %{
25418 predicate(false);
25419 match(Set dst (AddI src1 src2));
25420 format %{ "leal $dst, [$src1 + $src2]" %}
25421 ins_encode %{
25422 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25423 %}
25424 ins_pipe(ialu_reg_reg);
25425 %}
25426
25427 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25428 %{
25429 predicate(false);
25430 match(Set dst (LShiftI src shift));
25431 format %{ "leal $dst, [$src << $shift]" %}
25432 ins_encode %{
25433 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25434 Register src = $src$$Register;
25435 if (scale == Address::times_2 && src != rbp && src != r13) {
25436 __ leal($dst$$Register, Address(src, src, Address::times_1));
25437 } else {
25438 __ leal($dst$$Register, Address(noreg, src, scale));
25439 }
25440 %}
25441 ins_pipe(ialu_reg_reg);
25442 %}
25443
25444 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25445 %{
25446 predicate(false);
25447 match(Set dst (AddL src1 src2));
25448 format %{ "leaq $dst, [$src1 + $src2]" %}
25449 ins_encode %{
25450 Register dst = $dst$$Register;
25451 Register src1 = $src1$$Register;
25452 Register src2 = $src2$$Register;
25453 if (src1 != rbp && src1 != r13) {
25454 __ leaq(dst, Address(src1, src2, Address::times_1));
25455 } else {
25456 assert(src2 != rbp && src2 != r13, "");
25457 __ leaq(dst, Address(src2, src1, Address::times_1));
25458 }
25459 %}
25460 ins_pipe(ialu_reg_reg);
25461 %}
25462
25463 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25464 %{
25465 predicate(false);
25466 match(Set dst (AddL src1 src2));
25467 format %{ "leaq $dst, [$src1 + $src2]" %}
25468 ins_encode %{
25469 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25470 %}
25471 ins_pipe(ialu_reg_reg);
25472 %}
25473
25474 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25475 %{
25476 predicate(false);
25477 match(Set dst (LShiftL src shift));
25478 format %{ "leaq $dst, [$src << $shift]" %}
25479 ins_encode %{
25480 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25481 Register src = $src$$Register;
25482 if (scale == Address::times_2 && src != rbp && src != r13) {
25483 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25484 } else {
25485 __ leaq($dst$$Register, Address(noreg, src, scale));
25486 }
25487 %}
25488 ins_pipe(ialu_reg_reg);
25489 %}
25490
25491 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25492 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25493 // processors with at least partial ALU support for lea
25494 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25495 // beneficial for processors with full ALU support
25496 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25497
25498 peephole
25499 %{
25500 peeppredicate(VM_Version::supports_fast_2op_lea());
25501 peepmatch (addI_rReg);
25502 peepprocedure (lea_coalesce_reg);
25503 peepreplace (leaI_rReg_rReg_peep());
25504 %}
25505
25506 peephole
25507 %{
25508 peeppredicate(VM_Version::supports_fast_2op_lea());
25509 peepmatch (addI_rReg_imm);
25510 peepprocedure (lea_coalesce_imm);
25511 peepreplace (leaI_rReg_immI_peep());
25512 %}
25513
25514 peephole
25515 %{
25516 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25517 VM_Version::is_intel_cascade_lake());
25518 peepmatch (incI_rReg);
25519 peepprocedure (lea_coalesce_imm);
25520 peepreplace (leaI_rReg_immI_peep());
25521 %}
25522
25523 peephole
25524 %{
25525 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25526 VM_Version::is_intel_cascade_lake());
25527 peepmatch (decI_rReg);
25528 peepprocedure (lea_coalesce_imm);
25529 peepreplace (leaI_rReg_immI_peep());
25530 %}
25531
25532 peephole
25533 %{
25534 peeppredicate(VM_Version::supports_fast_2op_lea());
25535 peepmatch (salI_rReg_immI2);
25536 peepprocedure (lea_coalesce_imm);
25537 peepreplace (leaI_rReg_immI2_peep());
25538 %}
25539
25540 peephole
25541 %{
25542 peeppredicate(VM_Version::supports_fast_2op_lea());
25543 peepmatch (addL_rReg);
25544 peepprocedure (lea_coalesce_reg);
25545 peepreplace (leaL_rReg_rReg_peep());
25546 %}
25547
25548 peephole
25549 %{
25550 peeppredicate(VM_Version::supports_fast_2op_lea());
25551 peepmatch (addL_rReg_imm);
25552 peepprocedure (lea_coalesce_imm);
25553 peepreplace (leaL_rReg_immL32_peep());
25554 %}
25555
25556 peephole
25557 %{
25558 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25559 VM_Version::is_intel_cascade_lake());
25560 peepmatch (incL_rReg);
25561 peepprocedure (lea_coalesce_imm);
25562 peepreplace (leaL_rReg_immL32_peep());
25563 %}
25564
25565 peephole
25566 %{
25567 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25568 VM_Version::is_intel_cascade_lake());
25569 peepmatch (decL_rReg);
25570 peepprocedure (lea_coalesce_imm);
25571 peepreplace (leaL_rReg_immL32_peep());
25572 %}
25573
25574 peephole
25575 %{
25576 peeppredicate(VM_Version::supports_fast_2op_lea());
25577 peepmatch (salL_rReg_immI2);
25578 peepprocedure (lea_coalesce_imm);
25579 peepreplace (leaL_rReg_immI2_peep());
25580 %}
25581
25582 peephole
25583 %{
25584 peepmatch (leaPCompressedOopOffset);
25585 peepprocedure (lea_remove_redundant);
25586 %}
25587
25588 peephole
25589 %{
25590 peepmatch (leaP8Narrow);
25591 peepprocedure (lea_remove_redundant);
25592 %}
25593
25594 peephole
25595 %{
25596 peepmatch (leaP32Narrow);
25597 peepprocedure (lea_remove_redundant);
25598 %}
25599
25600 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25601 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25602
25603 //int variant
25604 peephole
25605 %{
25606 peepmatch (testI_reg);
25607 peepprocedure (test_may_remove);
25608 %}
25609
25610 //long variant
25611 peephole
25612 %{
25613 peepmatch (testL_reg);
25614 peepprocedure (test_may_remove);
25615 %}
25616
25617
25618 //----------SMARTSPILL RULES---------------------------------------------------
25619 // These must follow all instruction definitions as they use the names
25620 // defined in the instructions definitions.